diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py new file mode 100644 index 0000000..66aa9f3 --- /dev/null +++ b/acdcli/plugins/split.py @@ -0,0 +1,372 @@ +""" +Plugin to upload and download bug files in chunks + +openssl aes-256-cbc -d -in 00000000 > 00000000.xz +7za x 00000000.xz + +""" + +import io +import os +import sys +import json +import lzma +import hashlib +import logging + +from Crypto import Random +from Crypto.Cipher import AES + +from . import * + +logger = logging.getLogger(__name__) + +# compress settings +lzma_filters = [{"id": lzma.FILTER_LZMA2, "preset": lzma.PRESET_EXTREME}] + + +def derive_key_and_iv(password, salt, key_length, iv_length): + """ + Helper function to get the key and IV from a password and optional salt + OpenSSL compatible + """ + d = d_i = b'' + while len(d) < key_length + iv_length: + d_i = hashlib.md5(d_i + str.encode(password) + salt).digest() + d += d_i + return d[:key_length], d[key_length:key_length+iv_length] + + +def prepare_chunk(chunk, compress=False, encrypt_password=None): + """ + Compresses and encrypt a chunk of data + """ + if compress: + # save original chunk size for comparing + # to the compressed size + chunk_length_original = len(chunk) + + # compress + chunk = lzma.compress(chunk, filters=lzma_filters) + + logger.debug("Compressed %d bytes to %s, saved %02.2f%%", + chunk_length_original, + len(chunk), + 100.0 * ((chunk_length_original - len(chunk)) / chunk_length_original)) + + if encrypt_password: + # create a new Random salt for each chunk + salt = Random.new().read(8) + # get key and IV based on password and the salt + key, iv = derive_key_and_iv(encrypt_password, salt, 32, 16) + # header for an OpenSSL encrypted file + # the term "Salted__" followed by 8 bytes salt + cipher_header = b'Salted__' + salt + + # create the cipher, AES-256 + cipher = AES.new(key, AES.MODE_CBC, iv) + # we need to padd + padding_length = (AES.block_size - len(chunk) % AES.block_size) or AES.block_size + # add the padding at the end of the chunk + # for OpenSSL compatibility and to get original + # size of the chunk after decryption + chunk += str.encode(padding_length * chr(padding_length)) + # finally add the header and the encrypted + chunk = cipher_header + cipher.encrypt(chunk) + + # return a fake stream + return io.BytesIO(chunk) + + +def unprepare_chunk(chunk, decrypt_password=None): + """ + Helper function that decrypted and decompresses + a chunk of data + """ + + if b'Salted__' == chunk[:8]: + # the chunk starts with "Salted__" + # so it's encrypted + if not decrypt_password: + # no dice without password + logger.critical("Found encrypted chunk but no password specified") + # we can't recover from that + sys.exit(-1) + + # read the salt, the 8 bytes following the "Salted__" + salt = chunk[8:16] + # again generate key and IV + key, iv = derive_key_and_iv(decrypt_password, salt, 32, 16) + # and the AES-256 cipher + cipher = AES.new(key, AES.MODE_CBC, iv) + # decrypt + chunk = cipher.decrypt(bytes(chunk[16:])) + # get the length of the padding bytes + padding_length = chunk[-1] + # cut them off + chunk = chunk[:-padding_length] + + try: + # decompress + chunk = lzma.decompress(chunk) + except: + # not so nice code, should check if it at least + # looks like a LZMA compressed chunk + pass + + return chunk + + +class SplitPlugin(Plugin): + MIN_VERSION = '0.3.1' + + @classmethod + def attach(cls, subparsers: argparse.ArgumentParser, log: list, **kwargs): + """ Attaches this plugin to the top-level argparse subparser group + :param subparsers the action subparser group + :param log a list to put initialization log messages in + """ + arg_parser = subparsers.add_parser('split-upload', add_help=False) + arg_parser.add_argument('--lzma-compress', '-lc', action='store_true') + arg_parser.add_argument('--password', '-p', type=str) + arg_parser.add_argument('localpath') + arg_parser.add_argument('remotepath') + arg_parser.add_argument('splitsize', type=int) + arg_parser.set_defaults(func=cls.split_upload) + + arg_parser = subparsers.add_parser('split-download', add_help=False) + arg_parser.add_argument('--password', '-p', type=str) + arg_parser.add_argument('remotepath') + arg_parser.add_argument('localpath') + arg_parser.add_argument('splitsize', type=int) + arg_parser.set_defaults(func=cls.split_download) + + log.append(str(cls) + ' attached.') + + @classmethod + def split_upload(cls, args: argparse.Namespace) -> int: + """ + Upload method + """ + + # extract the importanr value for easier access + local_path = os.path.abspath(args.localpath) + remote_path = args.remotepath + chunk_size = args.splitsize + client = args.acd_client + cache = args.cache + + # does the local file exists at all? + if not os.path.exists(local_path): + # Nope, we are done + logger.critical("File %s doe not exist", local_path) + return 1 + + # does the remote already exists + remote_node = cache.resolve(remote_path) + if not remote_node: + # But we need at least the parent node + # to create a directory in it + parent_name = os.path.dirname(remote_path) + parent_node = cache.resolve(parent_name) + if not parent_node: + # No parent, no go + logger.critical("Parent %s for %s does not exist", parent_name, remote_path) + return 1 + + # create a directory we can put the chunks in + remote_node = client.create_folder(os.path.basename(remote_path), parent_node.id) + cache.insert_node(remote_node) + remote_node = cache.resolve(remote_path) + + # sanity check if the remote is a directory + if not remote_node.is_folder: + logger.critical("Remote path %s is not a directory", remote_path) + return 1 + + # default for chunks data + chunks_data = {"chunks": []} + chunks_node = cache.resolve(remote_path + "/chunks") + if chunks_node: + # load from ACD + chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id, + 0, + chunks_node.size), + decrypt_password=args.password).decode("utf8")) + else: + # chunks file doesn't existsi yet, create it with the defalt values' + result = client.upload_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"), + args.lzma_compress, + args.password), + "chunks", + remote_node.id) + cache.insert_node(result) + chunks_node = cache.resolve(remote_path + "/chunks") + + # okay, time to get started + with open(local_path, "rb") as file_stream: + chunk_id = 0 + while True: + # read the local file chunk by chunk + chunk = file_stream.read(chunk_size) + if not chunk: + # nothing to ready anymore, WE ARE DONE + break + + # calculate the hash + md5_digest = hashlib.md5(chunk).hexdigest() + + upload_chunk = True + # the name for our current chunk + chunk_name = "%x" % chunk_id + + # does it already exist on ACD + remote_child_node = cache.get_child(remote_node.id, chunk_name) + if remote_child_node: + # YES + + # if we have a MD5 hash in the chunks file and the user has requested compression + # or encryption us the cached MD5 to compare + if len(chunks_data["chunks"]) > chunk_id and (args.lzma_compress or args.password): + upload_chunk = md5_digest != chunks_data["chunks"][chunk_id] + else: + # otherwise compare against the MD5 of ACD + upload_chunk = md5_digest != remote_child_node.md5 + + if upload_chunk: + logger.info('%s: MD5 mis-match (%s / %s)' % (chunk_name, remote_child_node.md5, md5_digest)) + + if upload_chunk: + # So, we need to upload something + + # update the chunks data + if len(chunks_data["chunks"]) > chunk_id: + chunks_data["chunks"][chunk_id] = md5_digest + else: + chunks_data["chunks"].append(md5_digest) + + # prepate the chunk for ulpoad (maybe compress and encrypt) + chunk = prepare_chunk(chunk, compress=args.lzma_compress, encrypt_password=args.password) + + if remote_child_node: + # if it exists, overwrite + logger.info("%s: Overwriting %d bytes to node %s", chunk_name, len(chunk.getvalue()), remote_child_node.id) + remote_child_node = client.overwrite_stream(chunk, remote_child_node.id) + else: + # if not, create a new file + logger.info("%s: Uploading %d bytes", chunk_name, len(chunk.getvalue())) + remote_child_node = client.upload_stream(chunk, chunk_name, remote_node.id) + cache.insert_node(remote_child_node) + + # always update the chunks file on ACD + result = client.overwrite_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"), + compress=args.lzma_compress, + encrypt_password=args.password), + chunks_node.id) + cache.insert_node(result) + else: + logger.info("%s: Keeping %d bytes", chunk_name, len(chunk)) + + # and the next chunk + chunk_id += 1 + + return 0 + + @classmethod + def split_download(cls, args: argparse.Namespace) -> int: + """ + Download method + """ + + # extract the importanr value for easier access + remote_path = args.remotepath + local_path = os.path.abspath(args.localpath) + chunk_size = args.splitsize + client = args.acd_client + cache = args.cache + + # get the remote node + remote_node = cache.resolve(remote_path) + if not remote_node: + # does not exist? + logger.critical("Remote %s does not exist", remote_path) + return 1 + + # at least it's folder + if not remote_node.is_folder: + # NO?!?!? WTF?! + logger.critical("Remote path %s is not a directory", remote_path) + return 1 + + # default for chunks data + chunks_data = {"chunks": []} + chunks_node = cache.resolve(remote_path + "/chunks") + if chunks_node: + # load from ACD + chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id, + 0, + chunks_node.size), + decrypt_password=args.password).decode("utf8")) + + # if the file already exists use this so we can also read from it + open_mode = "rb+" + if not os.path.exists(local_path): + # if there is no local file we just need to write + open_mode = "wb+" + + # okay, time to get started + with open(local_path, open_mode) as file_stream: + chunk_id = 0 + while True: + # the name for our current chunk + chunk_name = "%x" % chunk_id + chunk_node = cache.get_child(remote_node.id, chunk_name) + if not chunk_node: + # no chunk file on ACD, we are done + logger.debug("Chunk %s not found, stopping", chunk_name) + break + + overwrite_chunk = False + # save the current position so we can jump back when we + # need to overwrite this + current_position = file_stream.tell() + # read the chunk + current_chunk = file_stream.read(chunk_size) + # get the hash + current_md5 = hashlib.md5(current_chunk).hexdigest() + + # Do we have a MD5 from the chunks file? + if len(chunks_data["chunks"]) > chunk_id: + # Yes, compare the local hash against the one from + # the caches file + overwrite_chunk = current_md5 != chunks_data["chunks"][chunk_id] + else: + # no data + + # overwrite if the filesizes don't match + if len(current_chunk) != chunk_node.size: + logger.debug("%s: Size mis-match %d / %d", chunk_name, len(current_chunk), chunk_node.size) + overwrite_chunk = True + else: + # or if the hash from ACD is different + if current_md5 != chunk_node.md5: + logger.debug("%s: MD5 mis-match %s / %s", chunk_name, current_md5, chunk_node.md5) + overwrite_chunk = True + + if not overwrite_chunk: + logger.info("%s: Keeping chunk with %d bytes", chunk_name, chunk_node.size) + else: + # we need to overwrite our local file chunk + logger.info("Download chunk %s with %d bytes", chunk_name, chunk_node.size) + # download the chunk from ACD + chunk = client.download_chunk(chunk_node.id, 0, chunk_node.size) + + # jump back to the start of the chunk + file_stream.seek(current_position) + # overwrite it + file_stream.write(unprepare_chunk(chunk, decrypt_password=args.password)) + + # and the next chunk + chunk_id += 1 + + return 0