diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a78220d --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +*.pyc +*.sqlite +yyy +zzz_* diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..ff2bff1 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,41 @@ +Changelog +========= + +v2.0.0 (2016-09-09) +------------------- + +#. Completely rewritten, works in PY2 and PY3 +#. Support new Kobo db schema + + +v1.0.5 (2016-09-03) +------------------- + +#. Tested new Kobo DB schema + + +v1.0.4 (2016-06-23) +------------------- + +#. Emergency fix for new Kobo DB schema + + +v1.0.3 (2015-01-23) +------------------- + +#. Added options -l and -b (suggested by Pierre-Arnaud Rabier) + + +v1.0.2 (2013-05-11) +------------------- + +#. Better usage text + + +v1.0.1 (2013-04-30) +------------------- + +#. Initial release + + + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..64f4400 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2013-2016 Alberto Pettarin (alberto@albertopettarin.it) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..94211f4 --- /dev/null +++ b/README.md @@ -0,0 +1,181 @@ +# export-kobo + +A Python tool to export annotations and highlights from a Kobo SQLite file. + +* Version: 2.0.0 +* Date: 2016-09-08 +* Developer: [Alberto Pettarin](http://www.albertopettarin.it/) +* License: the MIT License (MIT) +* Contact: [click here](http://www.albertopettarin.it/contact.html) + +## Usage + +```bash +$ # print all annotations and highlights to stdout +$ python export-kobo.py KoboReader.sqlite + +$ # print the help +$ python export-kobo.py --help + +$ # export to file instead of stdout +$ python export-kobo.py KoboReader.sqlite --output /path/to/out.txt + +$ # export as CSV +$ python export-kobo.py KoboReader.sqlite --csv + +$ # export as CSV to file +$ python export-kobo.py KoboReader.sqlite --csv --output /path/to/out.csv + +$ # export annotations only +$ python export-kobo.py KoboReader.sqlite --annotations-only + +$ # export highlights only +$ python export-kobo.py KoboReader.sqlite --highlights-only + +$ # export as CSV to file annotations only +$ python export-kobo.py KoboReader.sqlite --csv --annotations-only --output /path/to/out.txt + +$ # print the list of books with annotations or highlights to stdout +$ python export-kobo.py KoboReader.sqlite --list + +$ # as above, but export to file +$ python export-kobo.py KoboReader.sqlite --list --output /path/to/out.txt + +$ # as above, but export in CSV format +$ python export-kobo.py KoboReader.sqlite --list --csv --output /path/to/out.txt + +$ # export annotations and highlights for the book "Alice in Wonderland" +$ python export-kobo.py KoboReader.sqlite --book "Alice in Wonderland" + +$ # as above, assuming "Alice in Wonderland" has ID "12" in the list printed by --list +$ python export-kobo.py KoboReader.sqlite --bookid 12 +``` + + +## Installation + +1. Install Python, 3.x (recommended), or 2.7.x, + and make sure you have the ``python`` command available in your shell; + +2. Clone this repository: + ```bash + $ git clone https://github.com/pettarin/export-kobo + ``` + or manually download the ZIP file from the [Releases tab](https://github.com/pettarin/export-kobo/Releases/) and unzip it somewhere; + +3. Enter the directory where ``export-kobo.py`` is: + ```bash + $ cd export-kobo + ``` + +4. Copy in the same directory the ``KoboReader.sqlite`` file + from the ``.kobo/`` hidden directory of the USB drive + that appears when you plug your Kobo device to the USB port of your PC. + You might need to enable the ``View hidden files`` option + in your file manager to see the hidden directory; + +5. Now you can run the script as explained above, for example: + ```bash + $ python export-kobo.py KoboReader.sqlite + ``` + + +## Troubleshooting + +### I am on Windows, and I get this error: ``python is not recognized as an internal or external command, operable program or batch file`` + +Make sure you installed Python for your current user +(e.g., check the ``Install for all users`` option in the Python installer), +and that the directory containing the ``python.exe`` executable +is in your ``PATH`` environment variable +(e.g., check the ``Add Python to my PATH`` option in the Python installer). + +If you have already installed Python, but it is not in your ``PATH``, see +[this page](https://docs.python.org/3/using/windows.html) +for directions to solve this issue. + +### I got lots of question marks (``?``) in my output + +If you are using Python 2.7.x, try switching to Python 3.x, +which has saner support for Unicode characters. + +You might also want to use the ``--output FILE`` switch +to output to file instead of printing to standard output. + +### I ran the script, but I obtained too much data + +If you want to output annotations or highlights for a single book, +you can use the ``--list`` option to list all books with annotations or highlights, +and then use ``--book`` or ``--bookid`` to export only those you are interested in: + +``` bash +$ python export-kobo.py KoboReader.sqlite --list +ID Title +1 Alice in Wonderland +2 Moby Dick +3 Sonnets +... + +$ python export-kobo.py KoboReader.sqlite --book "Alice in Wonderland" +... +$ python export-kobo.py KoboReader.sqlite --bookid 1 +... +``` + +Alternatively, you can export to a CSV file with ``--csv --output FILE`` +and then open the resulting output file with a spreadsheet application, +disregarding the annotations/highlights you are not interested in: + +```bash +$ python export-kobo.py KoboReader.sqlite --csv --output notes.csv +$ libreoffice notes.csv +``` + +### I filtered my notes by book title with ``--book``, but I got no results + +Check that you wrote the book title exactly as printed by ``--list`` +(e.g., copy-and-paste it), or use ``--bookid`` instead. + + +## Notes + +1. Around May 2016 Kobo changed the schema + of their ``KoboReader.sqlite`` database with a firmware update. + The ``export-kobo.py`` script in the main directory of this repository + works for this **new** database schema. + If you still have an old firmware on your Kobo, + and hence the old database schema, + you might want to use one of the scripts in the ``old/`` directory. + Note, however, that those scripts are very old, possibly buggy, + and they are no longer supported. + +2. Since I no longer use a Kobo eReader, + this project is maintained in "legacy mode". + Changes to the schema of the ``KoboReader.sqlite`` database + can be reflected on the code + only thanks to users sending me their ``KoboReader.sqlite`` file, + for me to study its schema. + +3. Bear in mind that no official specifications are published by Kobo, + hence the script works as far as + my understanding of the database structure of ``KoboReader.sqlite`` is correct, + and its schema remains the same. + +4. Although the ``KoboReader.sqlite`` file is opened in read-only mode, + it is advisable to make a copy of it on your PC + and export your notes from this copy, + instead of directly accessing the file on your Kobo eReader device. + + +## Acknowledgments + +* Pierre-Arnaud Rabier suggested adding an option to extract the annotations and highlights for a single ebook. +* Nick Kalogirou and Andrea Moro provided me with theirs KoboReader.sqlite file with the new schema. + + +## License + +**export-kobo** is released under the MIT License. + + + diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..227cea2 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +2.0.0 diff --git a/export-kobo.py b/export-kobo.py new file mode 100644 index 0000000..a26aaa8 --- /dev/null +++ b/export-kobo.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python +# coding=utf-8 + +# The MIT License (MIT) +# +# Copyright (c) 2013-2016 Alberto Pettarin (alberto@albertopettarin.it) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +Export annotations and highlights from a Kobo SQLite file. +""" + +from __future__ import absolute_import +from __future__ import print_function +import argparse +import csv +import io +import os +import sqlite3 +import sys + +__author__ = "Alberto Pettarin" +__email__ = "alberto@albertopettarin.it" +__copyright__ = "Copyright 2016, Alberto Pettarin (www.albertopettarin.it)" +__license__ = "MIT" +__status__ = "Production" +__version__ = "2.0.0" + + +PY2 = (sys.version_info[0] == 2) + + +class CommandLineTool(object): + + # overload in the actual subclass + # + AP_PROGRAM = sys.argv[0] + AP_DESCRIPTION = u"Generic Command Line Tool" + AP_ARGUMENTS = [ + # required args + # {"name": "foo", "nargs": 1, "type": str, "default": "baz", "help": "Foo help"}, + # + # optional args + # {"name": "--bar", "nargs": "?", "type": str,, "default": "foofoofoo", "help": "Bar help"}, + # {"name": "--quiet", "action": "store_true", "help": "Do not output to stdout"}, + ] + + def __init__(self): + self.parser = argparse.ArgumentParser( + prog=self.AP_PROGRAM, + description=self.AP_DESCRIPTION + ) + self.vargs = None + for arg in self.AP_ARGUMENTS: + if "action" in arg: + self.parser.add_argument( + arg["name"], + action=arg["action"], + help=arg["help"] + ) + else: + self.parser.add_argument( + arg["name"], + nargs=arg["nargs"], + type=arg["type"], + default=arg["default"], + help=arg["help"] + ) + + def run(self): + self.vargs = vars(self.parser.parse_args()) + self.actual_command() + sys.exit(0) + + # overload this in your actual subclass + def actual_command(self): + self.print_stdout(u"This script does nothing. Invoke another .py") + + def error(self, message): + self.print_stderr(u"ERROR: %s" % message) + sys.exit(1) + + def print_stdout(self, *args, **kwargs): + print(*args, **kwargs) + + def print_stderr(self, *args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +class Item(object): + + ANNOTATION = "annotation" + BOOKMARK = "bookmark" + HIGHLIGHT = "highlight" + + def __init__(self, values): + self.volumeid = values[0] + self.text = values[1] + self.annotation = values[2] + self.extraannotationdata = values[3] + self.datecreated = values[4] + self.datemodified = values[5] + self.booktitle = values[6] + self.title = values[7] + self.kind = self.BOOKMARK + if (self.text is not None) and (self.text != "") and (self.annotation is not None) and (self.annotation != ""): + self.kind = self.ANNOTATION + elif (self.text is not None) and (self.text != ""): + self.kind = self.HIGHLIGHT + + def csv_tuple(self): + return (self.kind, self.title, self.datecreated, self.datemodified, self.annotation, self.text) + + def __repr__(self): + return u"(%s, %s, %s, %s, %s, %s)" % self.csv_tuple() + + def __str__(self): + acc = [] + sep = u"\n=== === ===\n" + if self.kind == self.ANNOTATION: + acc.append(u"Type: %s" % (self.kind)) + acc.append(u"Title: %s" % (self.title)) + acc.append(u"Date created: %s" % (self.datecreated)) + acc.append(u"Annotation:%s%s%s" % (sep, self.annotation, sep)) + acc.append(u"Reference text:%s%s%s" % (sep, self.text, sep)) + if self.kind == self.HIGHLIGHT: + acc.append(u"Type: %s" % (self.kind)) + acc.append(u"Title: %s" % (self.title)) + acc.append(u"Date created: %s" % (self.datecreated)) + acc.append(u"Reference text:%s%s%s" % (sep, self.text, sep)) + return u"\n".join(acc) + + +class Book(object): + + def __init__(self, values): + self.volumeid = values[0] + self.booktitle = values[1] + self.title = values[2] + + def __repr__(self): + return u"(%s, %s, %s)" % (self.volumeid, self.booktitle, self.title) + + def __str__(self): + return self.__repr__() + + +class ExportKobo(CommandLineTool): + + AP_PROGRAM = u"export-kobo" + AP_DESCRIPTION = u"Export annotations and highlights from a Kobo SQLite file." + AP_ARGUMENTS = [ + { + "name": "db", + "nargs": None, + "type": str, + "default": None, + "help": "Path of the input KoboReader.sqlite file" + }, + { + "name": "--output", + "nargs": "?", + "type": str, + "default": None, + "help": "Output to file instead of using the standard output" + }, + { + "name": "--csv", + "action": "store_true", + "help": "Output in CSV format instead of human-readable format" + }, + { + "name": "--list", + "action": "store_true", + "help": "List the titles of books with annotations or highlights" + }, + { + "name": "--book", + "nargs": "?", + "type": str, + "default": None, + "help": "Output annotations and highlights only from the book with the given title" + }, + { + "name": "--bookid", + "nargs": "?", + "type": str, + "default": None, + "help": "Output annotations and highlights only from the book with the given ID" + }, + { + "name": "--annotations-only", + "action": "store_true", + "help": "Outputs annotations only, excluding highlights" + }, + { + "name": "--highlights-only", + "action": "store_true", + "help": "Outputs highlights only, excluding annotations" + }, + { + "name": "--info", + "action": "store_true", + "help": "Print information about the number of annotations and highlights" + }, + ] + + QUERY_ITEMS = ( + "SELECT " + "Bookmark.VolumeID, " + "Bookmark.Text, " + "Bookmark.Annotation, " + "Bookmark.ExtraAnnotationData, " + "Bookmark.DateCreated, " + "Bookmark.DateModified, " + "content.BookTitle, " + "content.Title " + "FROM Bookmark INNER JOIN content " + "ON Bookmark.VolumeID = content.ContentID;" + ) + + QUERY_BOOKS = ( + "SELECT DISTINCT " + "Bookmark.VolumeID, " + "content.BookTitle, " + "content.Title " + "FROM Bookmark INNER JOIN content " + "ON Bookmark.VolumeID = content.ContentID " + "ORDER BY content.Title;" + ) + + def __init__(self): + super(ExportKobo, self).__init__() + self.items = [] + + def actual_command(self): + if self.vargs["db"] is None: + self.error(u"You must specify the path to your KoboReader.sqlite file.") + + books = self.enumerate_books() + if self.vargs["list"]: + # export list of books + acc = [] + acc.append((u"ID", u"TITLE")) + for (i, b) in books: + acc.append((i, b.title)) + if self.vargs["csv"]: + acc = self.list_to_csv(acc) + else: + acc = u"\n".join([(u"%s\t%s" % (a, b)) for (a, b) in acc]) + else: + # export annotations and/or highlights + items = self.read_items() + if self.vargs["csv"]: + acc = self.list_to_csv([i.csv_tuple() for i in items]) + else: + template = u"%s\n" + acc = u"\n".join([(template % i) for i in items]) + + if self.vargs["output"] is not None: + # write to file + try: + with io.open(self.vargs["output"], "w", encoding="utf-8") as f: + f.write(acc) + except IOError: + self.error(u"Unable to write output file. Please check that the path is correct and that you have write permission on it.") + else: + # write to stdout + try: + self.print_stdout(acc) + except UnicodeEncodeError: + self.print_stdout(acc.encode("ascii", errors="replace")) + + if self.vargs["info"]: + self.print_stdout(u"") + self.print_stdout(u"Books with annotations or highlights: %d" % len(books)) + if not self.vargs["list"]: + self.print_stdout(u"Annotations and/or highlights: %d" % len(items)) + + def list_to_csv(self, data): + if PY2: + # PY2 + output = io.BytesIO() + else: + # PY3 + output = io.StringIO() + writer = csv.writer(output) + for d in data: + try: + writer.writerow(d) + except UnicodeEncodeError: + writer.writerow(tuple([v.encode("ascii", errors="replace") for v in d])) + if PY2: + # PY2 + return output.getvalue().decode("utf-8") + else: + # PY3 + return output.getvalue() + + def enumerate_books(self): + books = [Book(d) for d in self.query(self.QUERY_BOOKS)] + return list(enumerate(books, start=1)) + + def volumeid_from_bookid(self): + enum = self.enumerate_books() + bookid = self.vargs["bookid"] + try: + return enum[int(bookid) - 1][1].volumeid + except: + self.error(u"The bookid value must be an integer between 1 and %d" % (len(enum))) + + def read_items(self): + items = [Item(d) for d in self.query(self.QUERY_ITEMS)] + if len(items) == 0: + return items + if (self.vargs["bookid"] is not None) and (self.vargs["book"] is not None): + self.error(u"You cannot specify both --book and --bookid.") + if self.vargs["bookid"] is not None: + items = [i for i in items if i.volumeid == self.volumeid_from_bookid()] + if self.vargs["book"] is not None: + items = [i for i in items if i.title == self.vargs["book"]] + if self.vargs["highlights_only"]: + items = [i for i in items if i.kind == Item.HIGHLIGHT] + if self.vargs["annotations_only"]: + items = [i for i in items if i.kind == Item.ANNOTATION] + return items + + def query(self, query): + db_path = self.vargs["db"] + if not os.path.exists(db_path): + self.error(u"Unable to read the KoboReader.sqlite file. Please check that the path is correct and that you have read permission on it.") + try: + sql_connection = sqlite3.connect(db_path) + sql_cursor = sql_connection.cursor() + sql_cursor.execute(query) + data = sql_cursor.fetchall() + sql_cursor.close() + sql_connection.close() + except Exception as exc: + self.error(u"Unexpected error reading your KoboReader.sqlite file: %s" % (exc)) + # NOTE the values are Unicode strings (unicode on PY2, str on PY3) + # hence data is a list of tuples of Unicode strings + return data + + +def main(): + ExportKobo().run() + + +if __name__ == "__main__": + main() diff --git a/old/export_Kobo_notes.py b/old/export_Kobo_notes.py new file mode 100644 index 0000000..b22fc18 --- /dev/null +++ b/old/export_Kobo_notes.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPLv3' +__author__ = 'Alberto Pettarin (pettarin gmail.com)' +__copyright__ = '2012-2015 Alberto Pettarin (pettarin gmail.com)' +__version__ = 'v1.0.3' +__date__ = '2015-01-23' +__description__ = 'Extract highlights and annotations from KoboReader.sqlite' + + +### BEGIN changelog ### +# +# 1.0.3 2015-01-23 Added options -l and -b (suggested by Pierre-Arnaud Rabier) +# 1.0.2 2013-05-11 Better usage text +# 1.0.1 2013-04-30 Initial release +# +### END changelog ### + +import getopt, os, sqlite3, sys + +ANNOTATION="annotation" +BOOKMARK="bookmark" +HIGHLIGHT="highlight" + + +### BEGIN read_command_line_parameters ### +# read_command_line_parameters(argv) +# read the command line parameters given in argv, and return a suitable dict() +def read_command_line_parameters(argv): + + try: + optlist, free = getopt.getopt(argv[1:], 'chtb:f:o:', ['book=', 'file=', 'output=', 'csv', 'help', 'titles']) + #Python2# + except getopt.GetoptError, err: + #Python3# except getopt.GetoptError as err: + print_error(str(err)) + + return dict(optlist) +### END read_command_line_parameters ### + + +### BEGIN usage ### +# usage() +# print script usage +def usage(): + #Python2# + e = "python" + #Python2# + s = sys.argv[0] + #Python3# e = "python3" + #Python3# s = sys.argv[0] + print_("") + print_("$ %s %s [ARGUMENTS]" % (e, s)) + print_("") + print_("Arguments:") + print_(" -b, --book : output annotations only for book <title>") + print_(" -c, --csv : output CSV values, instead of human-readable strings") + print_(" -f, --file <file> : <file> is the path to KoboReader.sqlite") + print_(" -h, --help : print this usage message and exit") + print_(" -t, --titles : output a list of book titles with annotations") + print_(" -o, --output <file> : write output to <file>") + print_("") + print_("Exit codes:") + print_("") + print_(" 0 = no error") + print_(" 1 = invalid argument(s) error") + print_(" 2 = file KoboReader.sqlite not found") + print_(" 4 = provided file is not a valid KoboReader.sqlite db") + print_(" 8 = output file cannot be written") + print_("") + print_("Examples:") + print_("") + print_(" 1. Print this usage message") + print_(" $ %s %s -h" % (e, s)) + print_("") + print_(" 2. Print annotations and highlit passages in human-readable form") + print_(" $ %s %s -f KoboReader.sqlite" % (e, s)) + print_("") + print_(" 3. As above, but output to file output.txt") + print_(" $ %s %s -f KoboReader.sqlite -o output.txt " % (e, s)) + print_("") + print_(" 4. As above, but output in CSV form") + print_(" $ %s %s -c -f KoboReader.sqlite" % (e, s)) + print_("") + print_(" 5. As above, but output to output.csv") + print_(" $ %s %s -c -f KoboReader.sqlite -o output.csv" % (e, s)) + print_("") + print_(" 6. Print the list of book titles with annotations or highlit passages") + print_(" $ %s %s -t -f KoboReader.sqlite" % (e, s)) + print_("") + print_(" 7. Print annotations and highlit passages for 'The Art of War'") + print_(" $ %s %s -f KoboReader.sqlite -b 'The Art of War'" % (e, s)) + print_("") + print_(" 8. As above, but output to output.csv") + print_(" $ %s %s -c -f KoboReader.sqlite -b 'The Art of War' -o output.csv" % (e, s)) + print_("") +### END usage ### + + +### BEGIN print_error ### +# print_error(error, displayusage=True) +# print the given error, call usage, and exit +# optional displayusage to skip usage +def print_error(error, displayusage = True, exitcode = 1): + sys.stderr.write("[ERROR] " + error + " Aborting.\n") + if displayusage : + usage() + sys.exit(exitcode) +### END print_error ### + + +### BEGIN print_info ### +# print_info(info, quiet) +# print the given info string +def print_info(info, quiet): + if (not quiet): + print("[INFO] " + info) +### END print_info ### + + +### BEGIN print_ ### +# print_(info) +# print the given string +def print_(info): + print(info) +### END print_ ### + + +### BEGIN escape ### +# escape(s) +# escape ASCII sequences +def escape(s): + if ((s == None) or (len(s) < 1)): + return "" + + repl = [ + ["\0", "\\0"], + ["\a", "\\a"], + ["\b", "\\b"], + ["\t", " "], + ["\n", "\\n"], + ["\v", "\\v"], + ["\f", "\\f"], + ["\r", "\\r"] + ] + for r in repl: + s = s.replace(r[0], r[1]) + return s +### END escape ### + + +### BEGIN print_titles ### +# print_titles(data) +# data = [ [f_type, booktitle, text, annotation, date_created, date_modified] ] +# print list of titles +def print_titles(data): + acc = "" + tit = [] + fil = dict() + + for d in data: + [f_type, booktitle, text, annotation, date_created, date_modified] = d + if (not booktitle in fil): + tit.append(booktitle) + fil[booktitle] = True + + tit = sorted(tit) + for t in tit: + acc += "'%s'\n" % (t) + + return acc.strip() +### END print_titles ### + +### BEGIN print_hr ### +# print_hr(data) +# data = [ [f_type, booktitle, text, annotation, date_created, date_modified] ] +# print human-readable output +def print_hr(data): + + acc = "" + + for d in data: + [f_type, booktitle, text, annotation, date_created, date_modified] = d + + if (f_type == ANNOTATION): + acc += "Type: %s\n" % (f_type) + acc += "Title: %s\n" % (booktitle) + acc += "Reference text: %s\n" % (text) + acc += "Annotation: %s\n" % (annotation) + acc += "Date created: %s\n" % (date_created) + acc += "Date modified: %s\n" % (date_modified) + acc += "\n" + + if (f_type == HIGHLIGHT): + acc += "Type: %s\n" % (f_type) + acc += "Title: %s\n" % (booktitle) + acc += "Reference text: %s\n" % (text) + acc += "Date created: %s\n" % (date_created) + acc += "\n" + + return acc.strip() +### END print_hr ### + + +### BEGIN print_csv ### +# print_csv(data) +# data = [ [f_type, booktitle, text, annotation, date_created, date_modified] ] +# print csv output +def print_csv(data): + + # add header + data2 = [ ["Type", "Book Title", "Reference Text", "Annotation", "Date Created", "Date Modified"] ] + data + + acc = "" + + for d in data2: + acc += "%s\t%s\t%s\t%s\t%s\t%s\n" % (tuple(d)) + + return acc.strip() +### END print_csv ### + +### BEGIN main ### +def main(): + # read command line parameters + options = read_command_line_parameters(sys.argv) + + # if help required, print usage and exit + if (('-h' in options) or ('--help' in options)): + usage() + sys.exit(0) + + # look for dbFile + dbFile = None + if ('-f' in options): + dbFile = options['-f'] + if ('--file' in options): + dbFile = options['--file'] + + if (dbFile == None): + print_error("You should specify the path to the KoboReader.sqlite file.", exitcode=1) + if ('-f' in options) and ('--file' in options): + print_error("You cannot specify both '%s' and '%s' parameters." % ('-f', '--file'), exitcode=1) + if (not os.path.isfile(dbFile)): + print_error("File %s not found." % (dbFile), exitcode=2) + + # look for -b or --book switch + keepOnlyBookWithTitle = None + if ('-b' in options): + keepOnlyBookWithTitle = options['-b'] + if ('--book' in options): + keepOnlyBookWithTitle = options['--book'] + + # look for CSV switch + outputCSV = False + if ('-c' in options) or ('--csv' in options): + outputCSV = True + + # look for outFile + outFile = None + if ('-o' in options): + outFile = options['-o'] + if ('--output' in options): + outFile = options['--output'] + + # look for -t or --titles switch + onlyListTitles = False + if ('-t' in options) or ('--titles' in options): + onlyListTitles = True + + # good, we can try opening the given file + try: + sql_connection = sqlite3.connect(dbFile) + sql_cursor = sql_connection.cursor() + sql_cursor.execute('select Bookmark.ContentID,Bookmark.Text,Bookmark.Annotation,Bookmark.ExtraAnnotationData,Bookmark.DateCreated,Bookmark.DateModified,content.BookTitle,content.Title from Bookmark, content WHERE Bookmark.ContentID = content.ContentID;') + data = sql_cursor.fetchall() + sql_cursor.close() + sql_connection.close() + except: + print_error("File %s is not a valid KoboReader.sqlite db." % (dbFile), exitcode=4) + + # get data + acc = [] + for t in data: + content_id = escape(t[0]) + text = escape(t[1]) + annotation = escape(t[2]) + extra_annotation_data = escape(t[3]) + date_created = escape(t[4]) + date_modified = escape(t[5]) + booktitle = escape(t[6]) + title = escape(t[7]) + + f_type = BOOKMARK + if ((text != "") and (annotation != "")): + f_type = ANNOTATION + elif (text != ""): + f_type = HIGHLIGHT + + # filter by book title + if (keepOnlyBookWithTitle == None) or (keepOnlyBookWithTitle == booktitle): + acc.append([f_type, booktitle, text, annotation, date_created, date_modified]) + + # output titles to stdout + if (onlyListTitles): + print_("List of book titles with annotations (delimited by a ' character)") + print_("") + output = print_titles(acc) + else: + # output stuff to stdout + if (outputCSV): + output = print_csv(acc) + else: + output = print_hr(acc) + + # output to stdout or file? + if (outFile == None): + # stdout + print_(output) + else: + # file + try: + f = open(outFile, 'w') + f.write(output) + f.close() + except: + print_error("File %s cannot be written." % (outFile), exitcode=8) + + # return proper exit code + sys.exit(0) +### END main ### + + + +if __name__ == '__main__': + # TODO let the user specify file encoding instead + #Python2# + reload(sys) + #Python2# + sys.setdefaultencoding("utf-8") + main() + diff --git a/old/export_Kobo_notes_3.py b/old/export_Kobo_notes_3.py new file mode 100644 index 0000000..22bbee3 --- /dev/null +++ b/old/export_Kobo_notes_3.py @@ -0,0 +1,341 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPLv3' +__author__ = 'Alberto Pettarin (pettarin gmail.com)' +__copyright__ = '2012-2015 Alberto Pettarin (pettarin gmail.com)' +__version__ = 'v1.0.3' +__date__ = '2015-01-23' +__description__ = 'Extract highlights and annotations from KoboReader.sqlite' + + +### BEGIN changelog ### +# +# 1.0.3 2015-01-23 Added options -l and -b (suggested by Pierre-Arnaud Rabier) +# 1.0.2 2013-05-11 Better usage text +# 1.0.1 2013-04-30 Initial release +# +### END changelog ### + +import getopt, os, sqlite3, sys + +ANNOTATION="annotation" +BOOKMARK="bookmark" +HIGHLIGHT="highlight" + + +### BEGIN read_command_line_parameters ### +# read_command_line_parameters(argv) +# read the command line parameters given in argv, and return a suitable dict() +def read_command_line_parameters(argv): + + try: + optlist, free = getopt.getopt(argv[1:], 'chtb:f:o:', ['book=', 'file=', 'output=', 'csv', 'help', 'titles']) + #Python2# except getopt.GetoptError, err: + #Python3# + except getopt.GetoptError as err: + print_error(str(err)) + + return dict(optlist) +### END read_command_line_parameters ### + + +### BEGIN usage ### +# usage() +# print script usage +def usage(): + #Python2# e = "python" + #Python2# s = sys.argv[0] + #Python3# + e = "python3" + #Python3# + s = sys.argv[0] + print_("") + print_("$ %s %s [ARGUMENTS]" % (e, s)) + print_("") + print_("Arguments:") + print_(" -b, --book <title> : output annotations only for book <title>") + print_(" -c, --csv : output CSV values, instead of human-readable strings") + print_(" -f, --file <file> : <file> is the path to KoboReader.sqlite") + print_(" -h, --help : print this usage message and exit") + print_(" -t, --titles : output a list of book titles with annotations") + print_(" -o, --output <file> : write output to <file>") + print_("") + print_("Exit codes:") + print_("") + print_(" 0 = no error") + print_(" 1 = invalid argument(s) error") + print_(" 2 = file KoboReader.sqlite not found") + print_(" 4 = provided file is not a valid KoboReader.sqlite db") + print_(" 8 = output file cannot be written") + print_("") + print_("Examples:") + print_("") + print_(" 1. Print this usage message") + print_(" $ %s %s -h" % (e, s)) + print_("") + print_(" 2. Print annotations and highlit passages in human-readable form") + print_(" $ %s %s -f KoboReader.sqlite" % (e, s)) + print_("") + print_(" 3. As above, but output to file output.txt") + print_(" $ %s %s -f KoboReader.sqlite -o output.txt " % (e, s)) + print_("") + print_(" 4. As above, but output in CSV form") + print_(" $ %s %s -c -f KoboReader.sqlite" % (e, s)) + print_("") + print_(" 5. As above, but output to output.csv") + print_(" $ %s %s -c -f KoboReader.sqlite -o output.csv" % (e, s)) + print_("") + print_(" 6. Print the list of book titles with annotations or highlit passages") + print_(" $ %s %s -t -f KoboReader.sqlite" % (e, s)) + print_("") + print_(" 7. Print annotations and highlit passages for 'The Art of War'") + print_(" $ %s %s -f KoboReader.sqlite -b 'The Art of War'" % (e, s)) + print_("") + print_(" 8. As above, but output to output.csv") + print_(" $ %s %s -c -f KoboReader.sqlite -b 'The Art of War' -o output.csv" % (e, s)) + print_("") +### END usage ### + + +### BEGIN print_error ### +# print_error(error, displayusage=True) +# print the given error, call usage, and exit +# optional displayusage to skip usage +def print_error(error, displayusage = True, exitcode = 1): + sys.stderr.write("[ERROR] " + error + " Aborting.\n") + if displayusage : + usage() + sys.exit(exitcode) +### END print_error ### + + +### BEGIN print_info ### +# print_info(info, quiet) +# print the given info string +def print_info(info, quiet): + if (not quiet): + print("[INFO] " + info) +### END print_info ### + + +### BEGIN print_ ### +# print_(info) +# print the given string +def print_(info): + print(info) +### END print_ ### + + +### BEGIN escape ### +# escape(s) +# escape ASCII sequences +def escape(s): + if ((s == None) or (len(s) < 1)): + return "" + + repl = [ + ["\0", "\\0"], + ["\a", "\\a"], + ["\b", "\\b"], + ["\t", " "], + ["\n", "\\n"], + ["\v", "\\v"], + ["\f", "\\f"], + ["\r", "\\r"] + ] + for r in repl: + s = s.replace(r[0], r[1]) + return s +### END escape ### + + +### BEGIN print_titles ### +# print_titles(data) +# data = [ [f_type, booktitle, text, annotation, date_created, date_modified] ] +# print list of titles +def print_titles(data): + acc = "" + tit = [] + fil = dict() + + for d in data: + [f_type, booktitle, text, annotation, date_created, date_modified] = d + if (not booktitle in fil): + tit.append(booktitle) + fil[booktitle] = True + + tit = sorted(tit) + for t in tit: + acc += "'%s'\n" % (t) + + return acc.strip() +### END print_titles ### + +### BEGIN print_hr ### +# print_hr(data) +# data = [ [f_type, booktitle, text, annotation, date_created, date_modified] ] +# print human-readable output +def print_hr(data): + + acc = "" + + for d in data: + [f_type, booktitle, text, annotation, date_created, date_modified] = d + + if (f_type == ANNOTATION): + acc += "Type: %s\n" % (f_type) + acc += "Title: %s\n" % (booktitle) + acc += "Reference text: %s\n" % (text) + acc += "Annotation: %s\n" % (annotation) + acc += "Date created: %s\n" % (date_created) + acc += "Date modified: %s\n" % (date_modified) + acc += "\n" + + if (f_type == HIGHLIGHT): + acc += "Type: %s\n" % (f_type) + acc += "Title: %s\n" % (booktitle) + acc += "Reference text: %s\n" % (text) + acc += "Date created: %s\n" % (date_created) + acc += "\n" + + return acc.strip() +### END print_hr ### + + +### BEGIN print_csv ### +# print_csv(data) +# data = [ [f_type, booktitle, text, annotation, date_created, date_modified] ] +# print csv output +def print_csv(data): + + # add header + data2 = [ ["Type", "Book Title", "Reference Text", "Annotation", "Date Created", "Date Modified"] ] + data + + acc = "" + + for d in data2: + acc += "%s\t%s\t%s\t%s\t%s\t%s\n" % (tuple(d)) + + return acc.strip() +### END print_csv ### + +### BEGIN main ### +def main(): + # read command line parameters + options = read_command_line_parameters(sys.argv) + + # if help required, print usage and exit + if (('-h' in options) or ('--help' in options)): + usage() + sys.exit(0) + + # look for dbFile + dbFile = None + if ('-f' in options): + dbFile = options['-f'] + if ('--file' in options): + dbFile = options['--file'] + + if (dbFile == None): + print_error("You should specify the path to the KoboReader.sqlite file.", exitcode=1) + if ('-f' in options) and ('--file' in options): + print_error("You cannot specify both '%s' and '%s' parameters." % ('-f', '--file'), exitcode=1) + if (not os.path.isfile(dbFile)): + print_error("File %s not found." % (dbFile), exitcode=2) + + # look for -b or --book switch + keepOnlyBookWithTitle = None + if ('-b' in options): + keepOnlyBookWithTitle = options['-b'] + if ('--book' in options): + keepOnlyBookWithTitle = options['--book'] + + # look for CSV switch + outputCSV = False + if ('-c' in options) or ('--csv' in options): + outputCSV = True + + # look for outFile + outFile = None + if ('-o' in options): + outFile = options['-o'] + if ('--output' in options): + outFile = options['--output'] + + # look for -t or --titles switch + onlyListTitles = False + if ('-t' in options) or ('--titles' in options): + onlyListTitles = True + + # good, we can try opening the given file + try: + sql_connection = sqlite3.connect(dbFile) + sql_cursor = sql_connection.cursor() + sql_cursor.execute('select Bookmark.ContentID,Bookmark.Text,Bookmark.Annotation,Bookmark.ExtraAnnotationData,Bookmark.DateCreated,Bookmark.DateModified,content.BookTitle,content.Title from Bookmark, content WHERE Bookmark.ContentID = content.ContentID;') + data = sql_cursor.fetchall() + sql_cursor.close() + sql_connection.close() + except: + print_error("File %s is not a valid KoboReader.sqlite db." % (dbFile), exitcode=4) + + # get data + acc = [] + for t in data: + content_id = escape(t[0]) + text = escape(t[1]) + annotation = escape(t[2]) + extra_annotation_data = escape(t[3]) + date_created = escape(t[4]) + date_modified = escape(t[5]) + booktitle = escape(t[6]) + title = escape(t[7]) + + f_type = BOOKMARK + if ((text != "") and (annotation != "")): + f_type = ANNOTATION + elif (text != ""): + f_type = HIGHLIGHT + + # filter by book title + if (keepOnlyBookWithTitle == None) or (keepOnlyBookWithTitle == booktitle): + acc.append([f_type, booktitle, text, annotation, date_created, date_modified]) + + # output titles to stdout + if (onlyListTitles): + print_("List of book titles with annotations (delimited by a ' character)") + print_("") + output = print_titles(acc) + else: + # output stuff to stdout + if (outputCSV): + output = print_csv(acc) + else: + output = print_hr(acc) + + # output to stdout or file? + if (outFile == None): + # stdout + print_(output) + else: + # file + try: + f = open(outFile, 'w') + f.write(output) + f.close() + except: + print_error("File %s cannot be written." % (outFile), exitcode=8) + + # return proper exit code + sys.exit(0) +### END main ### + + + +if __name__ == '__main__': + # TODO let the user specify file encoding instead + #Python2# reload(sys) + #Python2# sys.setdefaultencoding("utf-8") + main() + +