diff --git a/Dockerfile b/Dockerfile index 9df7e28..1416fff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,45 @@ +###################################################################### +# Establish a common builder image for all golang-based images +FROM docker.io/golang:1.21 as golang-builder +USER root +WORKDIR /workspace +# We don't vendor modules. Enforce that behavior +ENV GOFLAGS=-mod=readonly +ENV GO111MODULE=on +ARG TARGETOS +ARG TARGETARCH +ENV GOOS=${TARGETOS:-linux} +ENV GOARCH=${TARGETARCH} + +###################################################################### +# Build block binary +FROM golang-builder AS blockrsync-builder + +# Copy the Go Modules manifests & download dependencies +COPY go.mod go.mod +COPY go.sum go.sum + +# Copy the go source +COPY cmd/ cmd/ +COPY ./pkg/. pkg/ +RUN go mod download + +# Build +RUN go build -o blockrsync ./cmd/blockrsync/main.go +RUN go build -o proxy ./cmd/proxy/main.go + +###################################################################### +# Final container FROM registry.access.redhat.com/ubi8/ubi-minimal:latest -RUN echo -ne "[centos-8-baseos]\nname = CentOS 8 (RPMs) - BaseOS\nbaseurl = http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/\nenabled = 1\ngpgcheck = 0" > /etc/yum.repos.d/centos.repo +RUN microdnf update -y RUN microdnf -y install openssh-server stunnel rsync nmap && microdnf clean all COPY sshd_config /etc/ssh/sshd_config COPY stunnel.conf /etc/stunnel/stunnel.conf USER 65534:65534 + +WORKDIR / + +##### blockrsync +COPY --from=blockrsync-builder /workspace/blockrsync /blockrsync +COPY --from=blockrsync-builder /workspace/proxy /proxy + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/cmd/blockrsync/main.go b/cmd/blockrsync/main.go new file mode 100644 index 0000000..e73d1bd --- /dev/null +++ b/cmd/blockrsync/main.go @@ -0,0 +1,77 @@ +package main + +import ( + "flag" + "fmt" + "os" + + "go.uber.org/zap/zapcore" + + "github.com/spf13/pflag" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/awels/blockrsync/pkg/blockrsync" +) + +func usage() { + _, _ = fmt.Fprintf(os.Stderr, "Usage: %s [devicepath] [flags]\n", os.Args[0]) + flag.PrintDefaults() + os.Exit(2) +} + +func main() { + var ( + sourceMode = flag.Bool("source", false, "Source mode") + targetMode = flag.Bool("target", false, "Target mode") + targetAddress = flag.String("target-address", "", "address of the server, source only") + port = flag.Int("port", 8000, "port to listen on or connect to") + ) + opts := blockrsync.BlockRsyncOptions{} + + flag.BoolVar(&opts.Preallocation, "preallocate", false, "Preallocate empty file space") + flag.IntVar(&opts.BlockSize, "block-size", 65536, "block size, must be > 0 and a multiple of 4096") + + zapopts := zap.Options{ + Development: true, + TimeEncoder: zapcore.ISO8601TimeEncoder, + DestWriter: os.Stdout, + } + zapopts.BindFlags(flag.CommandLine) + + // Import flags into pflag so they can be bound by viper + pflag.CommandLine.AddGoFlagSet(flag.CommandLine) + + pflag.Parse() + logger := zap.New(zap.UseFlagOptions(&zapopts)) + + if opts.BlockSize <= 0 || opts.BlockSize%4096 != 0 { + fmt.Fprintf(os.Stderr, "block-size must be > 0 and a multiple of 4096\n") + usage() + } + if *sourceMode && !*targetMode { + if targetAddress == nil || *targetAddress == "" { + fmt.Fprintf(os.Stderr, "target-address must be specified with source flag\n") + usage() + os.Exit(1) + } + blockrsyncClient := blockrsync.NewBlockrsyncClient(os.Args[1], *targetAddress, *port, &opts, logger) + if err := blockrsyncClient.ConnectToTarget(); err != nil { + logger.Error(err, "Unable to connect to target", "source file", os.Args[1], "target address", *targetAddress) + // time.Sleep(5 * time.Minute) + os.Exit(1) + } + } else if *targetMode && !*sourceMode { + blockrsyncServer := blockrsync.NewBlockrsyncServer(os.Args[1], *port, &opts, logger) + if err := blockrsyncServer.StartServer(); err != nil { + logger.Error(err, "Unable to start server to write to file", "target file", os.Args[1]) + // time.Sleep(5 * time.Minute) + os.Exit(1) + } + } else { + fmt.Fprintf(os.Stderr, "Either source or target must be defined\n") + usage() + os.Exit(1) + } + // time.Sleep(5 * time.Minute) + logger.Info("Successfully completed sync") +} diff --git a/cmd/proxy/main.go b/cmd/proxy/main.go new file mode 100644 index 0000000..f84fae1 --- /dev/null +++ b/cmd/proxy/main.go @@ -0,0 +1,106 @@ +package main + +import ( + "flag" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/spf13/pflag" + "go.uber.org/zap/zapcore" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/awels/blockrsync/pkg/proxy" +) + +type arrayFlags []string + +func (i *arrayFlags) String() string { + return strings.Join(*i, ",") +} + +func (i *arrayFlags) Set(value string) error { + *i = append(*i, value) + return nil +} + +func main() { + var ( + sourceMode = flag.Bool("source", false, "Source mode") + targetMode = flag.Bool("target", false, "Target mode") + targetAddress = flag.String("target-address", "", "address of the server, source only") + controlFile = flag.String("control-file", "", "name and path to file to write when finished") + listenPort = flag.Int("listen-port", 9080, "port to listen on") + targetPort = flag.Int("target-port", 9000, "target port to connect to") + blockrsyncPath = flag.String("blockrsync-path", "/blockrsync", "path to blockrsync binary") + blockSize = flag.Int("block-size", 65536, "block size, must be > 0 and a multiple of 4096") + ) + + var identifiers arrayFlags + + flag.Var(&identifiers, "identifier", "identifier of the file, multiple allowed") + + zapopts := zap.Options{ + Development: true, + TimeEncoder: zapcore.ISO8601TimeEncoder, + DestWriter: os.Stdout, + } + zapopts.BindFlags(flag.CommandLine) + + // Import flags into pflag so they can be bound by viper + pflag.CommandLine.AddGoFlagSet(flag.CommandLine) + + pflag.Parse() + logger := zap.New(zap.UseFlagOptions(&zapopts)) + + if controlFile == nil || *controlFile == "" { + fmt.Fprintf(os.Stderr, "control-file must be specified\n") + os.Exit(1) + } + defer func() { + logger.Info("Writing control file", "file", *controlFile) + if err := createControlFile(*controlFile); err != nil { + logger.Error(err, "Unable to create control file") + } + }() + + if *sourceMode && !*targetMode { + if targetAddress == nil || *targetAddress == "" { + fmt.Fprintf(os.Stderr, "target-address must be specified with source flag\n") + os.Exit(1) + } + if len(identifiers) > 1 || len(identifiers) == 0 { + fmt.Fprintf(os.Stderr, "Only one identifier must be specified in source mode\n") + os.Exit(1) + } + client := proxy.NewProxyClient(*listenPort, *targetPort, *targetAddress, logger) + + if err := client.ConnectToTarget(identifiers[0]); err != nil { + logger.Error(err, "Unable to connect to target", "identifier", identifiers[0], "target address", *targetAddress) + os.Exit(1) + } + } else if *targetMode && !*sourceMode { + if len(identifiers) == 0 { + fmt.Fprintf(os.Stderr, "At least one identifier must be specified in target mode\n") + os.Exit(1) + } + server := proxy.NewProxyServer(*blockrsyncPath, *blockSize, *listenPort, identifiers, logger) + + if err := server.StartServer(); err != nil { + logger.Error(err, "Unable to start server") + os.Exit(1) + } + } else { + fmt.Fprintf(os.Stderr, "Must specify source or target, but not both\n") + os.Exit(1) + } +} + +func createControlFile(fileName string) error { + if err := os.MkdirAll(filepath.Dir(fileName), 0755); err != nil { + return err + } + _, err := os.Create(fileName) + return err +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..db01dcc --- /dev/null +++ b/go.mod @@ -0,0 +1,39 @@ +module github.com/awels/blockrsync + +go 1.21.6 + +require ( + github.com/go-logr/logr v1.4.1 + github.com/golang/snappy v0.0.4 + github.com/onsi/ginkgo/v2 v2.14.0 + github.com/onsi/gomega v1.30.0 + github.com/spf13/pflag v1.0.5 + go.uber.org/zap v1.27.0 + golang.org/x/crypto v0.21.0 + sigs.k8s.io/controller-runtime v0.17.3 +) + +require ( + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + go.uber.org/multierr v1.11.0 // indirect + golang.org/x/net v0.23.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/text v0.14.0 // indirect + golang.org/x/tools v0.16.1 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apimachinery v0.29.2 // indirect + k8s.io/klog/v2 v2.110.1 // indirect + k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..bfdcc66 --- /dev/null +++ b/go.sum @@ -0,0 +1,122 @@ +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/onsi/ginkgo/v2 v2.14.0 h1:vSmGj2Z5YPb9JwCWT6z6ihcUvDhuXLc3sJiqd3jMKAY= +github.com/onsi/ginkgo/v2 v2.14.0/go.mod h1:JkUdW7JkN0V6rFvsHcJ478egV3XH9NxpD27Hal/PhZw= +github.com/onsi/gomega v1.30.0 h1:hvMK7xYz4D3HapigLTeGdId/NcfQx1VHMJc60ew99+8= +github.com/onsi/gomega v1.30.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA= +golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A= +k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0= +k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8= +k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= +k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= +k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk= +sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/pkg/blockrsync/block_reader.go b/pkg/blockrsync/block_reader.go new file mode 100644 index 0000000..f4c60fb --- /dev/null +++ b/pkg/blockrsync/block_reader.go @@ -0,0 +1,79 @@ +package blockrsync + +import ( + "encoding/binary" + "io" + + "github.com/go-logr/logr" +) + +const ( + Hole byte = iota + Block +) + +type BlockReader struct { + source io.Reader + buf []byte + offset int64 + offsetType byte + log logr.Logger +} + +func NewBlockReader(source io.Reader, blockSize int, log logr.Logger) *BlockReader { + return &BlockReader{ + source: source, + buf: make([]byte, blockSize), + log: log, + } +} + +func (b *BlockReader) Next() (bool, error) { + var offset int64 + if err := binary.Read(b.source, binary.LittleEndian, &offset); err != nil { + b.log.V(5).Info("Failed to read offset", "error", err) + return handleReadError(err, nocallback) + } + b.offset = offset + + offsetType := make([]byte, 1) + if n, err := b.source.Read(offsetType); err != nil || n != 1 { + b.log.V(5).Info("Failed to read offset type", "error", err, "bytes", n) + return handleReadError(err, nocallback) + } + b.offsetType = offsetType[0] + if !b.IsHole() { + if n, err := io.ReadFull(b.source, b.buf[:cap(b.buf)]); err != nil { + b.log.V(5).Info("Failed to read complete block", "error", err, "bytes", n) + return handleReadError(err, func() { + b.buf = b.buf[:n] + }) + } + } + return true, nil +} + +func (b *BlockReader) Offset() int64 { + return b.offset +} + +func (b *BlockReader) IsHole() bool { + return b.offsetType == Hole +} + +func (b *BlockReader) Block() []byte { + return b.buf +} + +func handleReadError(err error, callback func()) (bool, error) { + if err == io.EOF || err == io.ErrUnexpectedEOF { + callback() + return false, nil + } else { + return false, err + } +} + +func nocallback() { + // No call to callback +} diff --git a/pkg/blockrsync/block_reader_test.go b/pkg/blockrsync/block_reader_test.go new file mode 100644 index 0000000..c4780e4 --- /dev/null +++ b/pkg/blockrsync/block_reader_test.go @@ -0,0 +1,116 @@ +package blockrsync + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const ( + blockReader = "block reader" +) + +var _ = Describe(blockReader, func() { + It("should read from a reader", func() { + r := createBytesReader(4) + br := NewBlockReader(r, 4, GinkgoLogr.WithName(blockReader)) + Expect(br).ToNot(BeNil()) + cont, err := br.Next() + Expect(err).ToNot(HaveOccurred()) + Expect(cont).To(BeTrue()) + Expect(br.Block()).To(HaveLen(4)) + for i := 0; i < 4; i++ { + Expect(br.Block()[i]).To(Equal(byte(i)), "%v", br.Block()) + } + Expect(br.Offset()).To(Equal(int64(4096))) + }) + + It("should handle errors", func() { + cont, err := handleReadError(errors.New("Random error"), nocallback) + Expect(err).To(HaveOccurred()) + Expect(cont).To(BeFalse()) + cont, err = handleReadError(io.EOF, nocallback) + Expect(err).ToNot(HaveOccurred()) + Expect(cont).To(BeFalse()) + cont, err = handleReadError(io.ErrUnexpectedEOF, nocallback) + Expect(err).ToNot(HaveOccurred()) + Expect(cont).To(BeFalse()) + }) + + It("should handle not receiving offset data", func() { + b := []byte{} + buf := bytes.NewBuffer(b) + buf.Write([]byte{1}) + br := NewBlockReader(buf, 4, GinkgoLogr.WithName(blockReader)) + Expect(br).ToNot(BeNil()) + cont, err := br.Next() + Expect(err).ToNot(HaveOccurred()) + Expect(cont).To(BeFalse()) + Expect(br.Offset()).To(Equal(int64(0))) + }) + + It("should handle not receiving offset type data", func() { + b := []byte{} + buf := bytes.NewBuffer(b) + err := binary.Write(buf, binary.LittleEndian, int64(4096)) + Expect(err).ToNot(HaveOccurred()) + br := NewBlockReader(buf, 4, GinkgoLogr.WithName(blockReader)) + Expect(br).ToNot(BeNil()) + cont, err := br.Next() + Expect(err).ToNot(HaveOccurred()) + Expect(cont).To(BeFalse()) + Expect(br.Offset()).To(Equal(int64(4096))) + }) + + It("should skip reading data if receiving a hole", func() { + b := []byte{} + buf := bytes.NewBuffer(b) + err := binary.Write(buf, binary.LittleEndian, int64(4096)) + Expect(err).ToNot(HaveOccurred()) + buf.Write([]byte{Hole}) + br := NewBlockReader(buf, 4, GinkgoLogr.WithName(blockReader)) + Expect(br).ToNot(BeNil()) + cont, err := br.Next() + Expect(err).ToNot(HaveOccurred()) + Expect(cont).To(BeTrue()) + Expect(br.Offset()).To(Equal(int64(4096))) + }) + + It("should handle not getting complete block data", func() { + b := []byte{} + buf := bytes.NewBuffer(b) + err := binary.Write(buf, binary.LittleEndian, int64(4096)) + Expect(err).ToNot(HaveOccurred()) + buf.Write([]byte{Block}) + buf.Write([]byte{255}) + br := NewBlockReader(buf, 4, GinkgoLogr.WithName(blockReader)) + Expect(br).ToNot(BeNil()) + cont, err := br.Next() + Expect(err).ToNot(HaveOccurred()) + Expect(cont).To(BeFalse()) + Expect(br.Offset()).To(Equal(int64(4096))) + Expect(br.Block()).To(HaveLen(1)) + Expect(br.Block()[0]).To(Equal(byte(255)), "%v", br.Block()) + }) +}) + +func createBytesReader(blockSize int) io.Reader { + if blockSize > 255 { + Fail("block size must be less than 256") + } + b := []byte{} + buf := bytes.NewBuffer(b) + err := binary.Write(buf, binary.LittleEndian, int64(4096)) + Expect(err).ToNot(HaveOccurred()) + buf.Write([]byte{1}) + for i := 0; i < blockSize; i++ { + buf.Write([]byte{byte(i)}) + } + fmt.Fprintf(GinkgoWriter, "buf: %v\n", buf.Bytes()) + return buf +} diff --git a/pkg/blockrsync/blockrsync_suite_test.go b/pkg/blockrsync/blockrsync_suite_test.go new file mode 100644 index 0000000..8bc577e --- /dev/null +++ b/pkg/blockrsync/blockrsync_suite_test.go @@ -0,0 +1,13 @@ +package blockrsync + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestClient(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "blockrsync client Suite") +} diff --git a/pkg/blockrsync/client.go b/pkg/blockrsync/client.go new file mode 100644 index 0000000..dde6ab0 --- /dev/null +++ b/pkg/blockrsync/client.go @@ -0,0 +1,185 @@ +package blockrsync + +import ( + "encoding/binary" + "fmt" + "io" + "net" + "os" + "slices" + "time" + + "github.com/go-logr/logr" + "github.com/golang/snappy" +) + +type BlockrsyncClient struct { + sourceFile string + hasher Hasher + sourceSize int64 + opts *BlockRsyncOptions + log logr.Logger + connectionProvider ConnectionProvider +} + +func NewBlockrsyncClient(sourceFile, targetAddress string, port int, opts *BlockRsyncOptions, logger logr.Logger) *BlockrsyncClient { + return &BlockrsyncClient{ + sourceFile: sourceFile, + hasher: NewFileHasher(int64(opts.BlockSize), logger.WithName("hasher")), + opts: opts, + log: logger, + connectionProvider: &NetworkConnectionProvider{ + targetAddress: targetAddress, + port: port, + }, + } +} + +func (b *BlockrsyncClient) ConnectToTarget() error { + f, err := os.Open(b.sourceFile) + if err != nil { + return err + } + b.log.Info("Opened file", "file", b.sourceFile) + defer f.Close() + + size, err := b.hasher.HashFile(b.sourceFile) + if err != nil { + return err + } + b.sourceSize = size + b.log.V(5).Info("Hashed file", "filename", b.sourceFile, "size", size) + conn, err := b.connectionProvider.Connect() + if err != nil { + return err + } + defer conn.Close() + reader := snappy.NewReader(conn) + var diff []int64 + if blockSize, sourceHashes, err := b.hasher.DeserializeHashes(reader); err != nil { + return err + } else { + diff, err = b.hasher.DiffHashes(blockSize, sourceHashes) + if err != nil { + return err + } + if len(diff) == 0 { + b.log.Info("No differences found") + return nil + } else { + b.log.Info("Differences found", "count", len(diff)) + } + } + writer := snappy.NewBufferedWriter(conn) + defer writer.Close() + + syncProgress := &progress{ + progressType: "sync progress", + logger: b.log, + start: float64(50), + } + if err := b.writeBlocksToServer(writer, diff, f, syncProgress); err != nil { + return err + } + + return nil +} + +func (b *BlockrsyncClient) writeBlocksToServer(writer io.Writer, offsets []int64, f io.ReaderAt, syncProgress Progress) error { + b.log.V(3).Info("Writing blocks to server") + t := time.Now() + defer func() { + b.log.V(3).Info("Writing blocks took", "milliseconds", time.Since(t).Milliseconds()) + }() + + b.log.V(5).Info("Sending size of source file") + if err := binary.Write(writer, binary.LittleEndian, b.sourceSize); err != nil { + return err + } + b.log.V(5).Info("Sorting offsets") + // Sort diff + slices.SortFunc(offsets, int64SortFunc) + b.log.V(5).Info("offsets", "values", offsets) + if syncProgress != nil { + syncProgress.Start(int64(len(offsets)) * b.hasher.BlockSize()) + } + buf := make([]byte, b.hasher.BlockSize()) + for i, offset := range offsets { + b.log.V(5).Info("Sending data", "offset", offset, "index", i, "blocksize", b.hasher.BlockSize()) + if err := binary.Write(writer, binary.LittleEndian, offset); err != nil { + return err + } + n, err := f.ReadAt(buf, offset) + if err != nil && err != io.EOF { + return err + } + if isEmptyBlock(buf) { + b.log.V(5).Info("Skipping empty block", "offset", offset) + if _, err := writer.Write([]byte{Hole}); err != nil { + return err + } + } else { + _, err := writer.Write([]byte{Block}) + if err != nil { + return err + } + if int64(n) != b.hasher.BlockSize() { + b.log.V(5).Info("read last bytes", "count", n) + } + buf = buf[:n] + b.log.V(5).Info("Writing bytes", "count", len(buf)) + _, err = writer.Write(buf) + if err != nil { + return err + } + } + if syncProgress != nil { + syncProgress.Update(int64(i) * b.hasher.BlockSize()) + } + } + return nil +} + +func isEmptyBlock(buf []byte) bool { + for _, b := range buf { + if b != 0 { + return false + } + } + return true +} + +func int64SortFunc(i, j int64) int { + if j > i { + return -1 + } else if j < i { + return 1 + } + return 0 +} + +type ConnectionProvider interface { + Connect() (io.ReadWriteCloser, error) +} + +type NetworkConnectionProvider struct { + targetAddress string + port int +} + +func (n *NetworkConnectionProvider) Connect() (io.ReadWriteCloser, error) { + retryCount := 0 + var conn io.ReadWriteCloser + var err error + for conn == nil { + conn, err = net.Dial("tcp", fmt.Sprintf("%s:%d", n.targetAddress, n.port)) + if err != nil { + if retryCount > 30 { + return nil, fmt.Errorf("unable to connect to target after %d retries", retryCount) + } + time.Sleep(time.Second) + retryCount++ + } + } + return conn, nil +} diff --git a/pkg/blockrsync/client_test.go b/pkg/blockrsync/client_test.go new file mode 100644 index 0000000..b5f1d80 --- /dev/null +++ b/pkg/blockrsync/client_test.go @@ -0,0 +1,259 @@ +package blockrsync + +import ( + "bytes" + "crypto/md5" + "encoding/binary" + "encoding/hex" + "errors" + "io" + "net" + "os" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const ( + testFileNameEmpty = "empty.raw" + testMD5 = "ba3cd24377dde5dfdd58728894004abb" +) + +var _ = Describe("blockrsync client tests", func() { + var ( + client *BlockrsyncClient + buf *bytes.Buffer + file *bytes.Reader + ) + BeforeEach(func() { + opts := BlockRsyncOptions{ + BlockSize: 2, + Preallocation: false, + } + client = NewBlockrsyncClient(filepath.Join(testImagePath, testFileName), "localhost", 8080, &opts, GinkgoLogr.WithName("client")) + client.sourceSize = 40 + buf = bytes.NewBuffer([]byte{}) + file = bytes.NewReader([]byte{1, 2, 0, 0, 3, 4}) + }) + + It("writeBlocksToServer should write a hole to the writer", func() { + testOffsets := []int64{2} + By("writing the blocks to the server") + err := client.writeBlocksToServer(buf, testOffsets, file, &TestProgress{ + expectedStart: 2, + expectedUpdate: 0, + }) + Expect(err).ToNot(HaveOccurred()) + + var sourceSize int64 + err = binary.Read(buf, binary.LittleEndian, &sourceSize) + Expect(err).ToNot(HaveOccurred()) + Expect(sourceSize).To(Equal(int64(40))) + + var offset int64 + err = binary.Read(buf, binary.LittleEndian, &offset) + Expect(err).ToNot(HaveOccurred()) + Expect(offset).To(Equal(int64(2))) + + offsetType := make([]byte, 1) + n, err := buf.Read(offsetType) + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(1)) + Expect(offsetType[0]).To(Equal(Hole)) + + _, err = buf.Read(make([]byte, 2)) + Expect(err).To(HaveOccurred()) + }) + + It("writeBlocksToServer should write a block to the writer", func() { + testOffsets := []int64{4} + By("writing the blocks to the server") + err := client.writeBlocksToServer(buf, testOffsets, file, nil) + Expect(err).ToNot(HaveOccurred()) + + var sourceSize int64 + err = binary.Read(buf, binary.LittleEndian, &sourceSize) + Expect(err).ToNot(HaveOccurred()) + Expect(sourceSize).To(Equal(int64(40))) + + var offset int64 + err = binary.Read(buf, binary.LittleEndian, &offset) + Expect(err).ToNot(HaveOccurred()) + Expect(offset).To(Equal(int64(4))) + + offsetType := make([]byte, 1) + n, err := buf.Read(offsetType) + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(1)) + Expect(offsetType[0]).To(Equal(Block)) + + res := make([]byte, 2) + n, err = buf.Read(res) + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(2)) + Expect(res).To(Equal([]byte{3, 4})) + _, err = buf.Read(make([]byte, 2)) + Expect(err).To(HaveOccurred()) + }) + + It("should handle first error properly", func() { + testOffsets := []int64{4} + By("writing the blocks to the server") + err := client.writeBlocksToServer(&ErrorWriter{ + buf: bytes.NewBuffer([]byte{}), + writeUntilErrorCount: 0, + currentCount: 0, + }, testOffsets, file, nil) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("error")) + }) + + It("should handle second error properly", func() { + testOffsets := []int64{4} + By("writing the blocks to the server") + err := client.writeBlocksToServer(&ErrorWriter{ + buf: bytes.NewBuffer([]byte{}), + writeUntilErrorCount: 1, + currentCount: 0, + }, testOffsets, file, &TestProgress{ + expectedStart: 2, + expectedUpdate: 2, + }) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("error")) + }) + + It("should handle error writing holes type properly", func() { + testOffsets := []int64{2} + By("writing the blocks to the server") + err := client.writeBlocksToServer(&ErrorWriter{ + buf: bytes.NewBuffer([]byte{}), + writeUntilErrorCount: 2, + currentCount: 0, + }, testOffsets, file, &TestProgress{ + expectedStart: 2, + expectedUpdate: 2, + }) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("error")) + }) + + It("should handle error writing block type properly", func() { + testOffsets := []int64{4} + By("writing the blocks to the server") + err := client.writeBlocksToServer(&ErrorWriter{ + buf: bytes.NewBuffer([]byte{}), + writeUntilErrorCount: 2, + currentCount: 0, + }, testOffsets, file, &TestProgress{ + expectedStart: 2, + expectedUpdate: 2, + }) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("error")) + }) + + It("should handle error writing block", func() { + testOffsets := []int64{4} + By("writing the blocks to the server") + err := client.writeBlocksToServer(&ErrorWriter{ + buf: bytes.NewBuffer([]byte{}), + writeUntilErrorCount: 3, + currentCount: 0, + }, testOffsets, file, &TestProgress{ + expectedStart: 2, + expectedUpdate: 2, + }) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal("error")) + }) + + Context("with server", func() { + It("should not detect differences between same files", func() { + opts := BlockRsyncOptions{ + BlockSize: 64 * 1024, + Preallocation: false, + } + port, err := getFreePort() + Expect(err).ToNot(HaveOccurred()) + client = NewBlockrsyncClient(filepath.Join(testImagePath, testFileName), "localhost", port, &opts, GinkgoLogr.WithName("client")) + server := NewBlockrsyncServer(filepath.Join(testImagePath, testFileName), port, &opts, GinkgoLogr.WithName("server")) + go func() { + defer GinkgoRecover() + err := server.StartServer() + Expect(err).ToNot(HaveOccurred()) + }() + err = client.ConnectToTarget() + Expect(err).ToNot(HaveOccurred()) + // Should not error, if trying to write it will error since no permissions. + }) + + It("should detect differences between source and empty file", func() { + tmpDir, err := os.MkdirTemp("", "blockrsync") + Expect(err).ToNot(HaveOccurred()) + opts := BlockRsyncOptions{ + BlockSize: 64 * 1024, + Preallocation: false, + } + port, err := getFreePort() + Expect(err).ToNot(HaveOccurred()) + client = NewBlockrsyncClient(filepath.Join(testImagePath, testFileName), "localhost", port, &opts, GinkgoLogr.WithName("client")) + server := NewBlockrsyncServer(filepath.Join(tmpDir, testFileNameEmpty), port, &opts, GinkgoLogr.WithName("server")) + go func() { + defer GinkgoRecover() + err := server.StartServer() + Expect(err).ToNot(HaveOccurred()) + }() + err = client.ConnectToTarget() + Expect(err).ToNot(HaveOccurred()) + md5sum := md5.New() + testFile, err := os.Open(filepath.Join(testImagePath, testFileName)) + Expect(err).ToNot(HaveOccurred()) + defer testFile.Close() + _, err = io.Copy(md5sum, testFile) + Expect(err).ToNot(HaveOccurred()) + hash := md5sum.Sum(nil) + Expect(hex.EncodeToString(hash)).To(Equal(testMD5)) + }) + }) +}) + +type ErrorWriter struct { + buf *bytes.Buffer + writeUntilErrorCount int + currentCount int +} + +func (e *ErrorWriter) Write(p []byte) (n int, err error) { + if e.currentCount == e.writeUntilErrorCount { + return 0, errors.New("error") + } + e.currentCount++ + return e.buf.Write(p) +} + +type TestProgress struct { + expectedStart int64 + expectedUpdate int64 +} + +func (p *TestProgress) Start(size int64) { + Expect(size).To(Equal(p.expectedStart)) +} + +func (p *TestProgress) Update(pos int64) { + Expect(pos).To(Equal(p.expectedUpdate)) +} + +func getFreePort() (port int, err error) { + var a *net.TCPAddr + if a, err = net.ResolveTCPAddr("tcp", "localhost:0"); err == nil { + var l *net.TCPListener + if l, err = net.ListenTCP("tcp", a); err == nil { + defer l.Close() + return l.Addr().(*net.TCPAddr).Port, nil + } + } + return +} diff --git a/pkg/blockrsync/hasher.go b/pkg/blockrsync/hasher.go new file mode 100644 index 0000000..41eef44 --- /dev/null +++ b/pkg/blockrsync/hasher.go @@ -0,0 +1,280 @@ +package blockrsync + +import ( + "bytes" + "encoding/base64" + "encoding/binary" + "errors" + "fmt" + "hash" + "io" + "math" + "os" + "slices" + "sync" + "time" + + "github.com/go-logr/logr" + "golang.org/x/crypto/blake2b" +) + +const ( + DefaultBlockSize = int64(64 * 1024) + defaultConcurrency = 25 +) + +type Hasher interface { + HashFile(file string) (int64, error) + GetHashes() map[int64][]byte + DiffHashes(int64, map[int64][]byte) ([]int64, error) + SerializeHashes(io.Writer) error + DeserializeHashes(io.Reader) (int64, map[int64][]byte, error) + BlockSize() int64 +} + +type OffsetHash struct { + Offset int64 + Hash []byte +} + +type FileHasher struct { + hashes map[int64][]byte + queue chan int64 + res chan OffsetHash + blockSize int64 + fileSize int64 + log logr.Logger +} + +func NewFileHasher(blockSize int64, log logr.Logger) Hasher { + return &FileHasher{ + blockSize: blockSize, + queue: make(chan int64, defaultConcurrency), + res: make(chan OffsetHash, defaultConcurrency), + hashes: make(map[int64][]byte), + log: log, + } +} + +func (f *FileHasher) HashFile(fileName string) (int64, error) { + f.log.V(3).Info("Hashing file", "file", fileName) + t := time.Now() + defer func() { + f.log.V(3).Info("Hashing took", "milliseconds", time.Since(t).Milliseconds()) + }() + done := make(chan struct{}) + size, err := f.getFileSize(fileName) + if err != nil { + return 0, err + } + f.fileSize = size + go f.calculateOffsets(f.fileSize) + + count := f.concurrentHashCount(f.fileSize) + wg := sync.WaitGroup{} + go func() { + wg.Wait() + done <- struct{}{} + }() + + for i := 0; i < count; i++ { + wg.Add(1) + h, err := blake2b.New512(nil) + if err != nil { + return 0, err + } + go func(h hash.Hash) { + defer wg.Done() + osFile, err := os.Open(fileName) + if err != nil { + f.log.Info("Failed to open file", "error", err) + return + } + for offset := range f.queue { + h.Reset() + defer osFile.Close() + if err := f.calculateHash(offset, osFile, h); err != nil { + f.log.Info("Failed to calculate hash", "offset", offset, "error", err) + return + } + } + }(h) + } + for { + select { + case offsetHash := <-f.res: + f.hashes[offsetHash.Offset] = offsetHash.Hash + case <-done: + return f.fileSize, nil + } + } +} + +func (f *FileHasher) getFileSize(fileName string) (int64, error) { + file, err := os.Open(fileName) + if err != nil { + return int64(0), err + } + defer file.Close() + size, err := file.Seek(0, io.SeekEnd) + if err != nil { + return int64(0), err + } + _, err = file.Seek(0, io.SeekStart) + if err != nil { + return int64(0), err + } + f.log.V(5).Info("Size", "bytes", size) + return size, nil +} + +func (f *FileHasher) concurrentHashCount(fileSize int64) int { + return int(math.Min(float64(defaultConcurrency), float64(int(fileSize/f.blockSize)))) +} + +func (f *FileHasher) calculateOffsets(size int64) { + var i int64 + defer close(f.queue) + f.log.V(5).Info("blocksize", "size", f.blockSize) + for i = 0; i < size; i += f.blockSize { + f.queue <- i + } +} + +func (f *FileHasher) calculateHash(offset int64, rs io.ReadSeeker, h hash.Hash) error { + _, err := rs.Seek(int64(offset), 0) + if err != nil { + f.log.V(5).Info("Failed to seek") + return err + } + buf := make([]byte, f.blockSize) + n, err := rs.Read(buf) + if err != nil { + f.log.V(5).Info("Failed to read") + return err + } + n, err = h.Write(buf[:n]) + if err != nil { + f.log.V(5).Info("Failed to write to hash") + return err + } + if n != len(buf) { + f.log.V(5).Info("Finished reading file") + } + offsetHash := OffsetHash{ + Offset: offset, + Hash: h.Sum(nil), + } + f.res <- offsetHash + return nil +} + +func (f *FileHasher) GetHashes() map[int64][]byte { + return f.hashes +} + +func (f *FileHasher) DiffHashes(blockSize int64, cmpHash map[int64][]byte) ([]int64, error) { + if blockSize != f.blockSize { + return nil, errors.New("block size mismatch") + } + var diff []int64 + f.log.V(5).Info("Size of hashes ", "hash", len(f.hashes), "incoming hash", len(cmpHash)) + for k, v := range f.hashes { + if _, ok := cmpHash[k]; !ok { + // Hash not found in cmpHash + diff = append(diff, k) + } else { + if !bytes.Equal(v, cmpHash[k]) { + // Hashes don't match + diff = append(diff, k) + } + delete(cmpHash, k) + } + } + for k := range cmpHash { + // remaining hashes in cmpHash, if the offset is < size of source file + if k < f.fileSize { + diff = append(diff, k) + } + } + return diff, nil +} + +func (f *FileHasher) SerializeHashes(w io.Writer) error { + f.log.V(3).Info("Serializing hashes") + t := time.Now() + defer func() { + f.log.V(3).Info("Serializing took", "milliseconds", time.Since(t).Milliseconds()) + }() + + if err := binary.Write(w, binary.LittleEndian, int64(f.blockSize)); err != nil { + return err + } + length := len(f.hashes) + f.log.V(5).Info("Number of blocks", "size", length) + if err := binary.Write(w, binary.LittleEndian, int64(length)); err != nil { + return err + } + keys := make([]int64, 0, len(f.hashes)) + for k := range f.hashes { + keys = append(keys, k) + } + slices.SortFunc(keys, int64SortFunc) + for _, k := range keys { + f.log.V(5).Info("Writing offset", "offset", k) + if err := binary.Write(w, binary.LittleEndian, k); err != nil { + return err + } + if len(f.hashes[k]) != 64 { + return errors.New("invalid hash length") + } + if n, err := w.Write(f.hashes[k]); err != nil { + return err + } else { + f.log.V(5).Info("Wrote hash", "bytes", n) + } + } + f.log.V(5).Info("Finished writing hashes") + return nil +} + +func (f *FileHasher) DeserializeHashes(r io.Reader) (int64, map[int64][]byte, error) { + f.log.V(3).Info("Deserializing hashes") + t := time.Now() + defer func() { + f.log.V(3).Info("Deserializing took", "milliseconds", time.Since(t).Milliseconds()) + }() + var blockSize int64 + if err := binary.Read(r, binary.LittleEndian, &blockSize); err != nil { + return 0, nil, err + } + var length int64 + if err := binary.Read(r, binary.LittleEndian, &length); err != nil { + return 0, nil, err + } + f.log.V(3).Info("Number of blocks to receive", "size", length) + hashes := make(map[int64][]byte) + for i := int64(0); i < length; i++ { + var offset int64 + if err := binary.Read(r, binary.LittleEndian, &offset); err != nil { + return 0, nil, err + } + f.log.V(5).Info("Reading offset", "offset", offset) + if offset < 0 || offset > length*blockSize { + return 0, nil, fmt.Errorf("invalid offset %d", offset) + } + hash := make([]byte, 64) + if n, err := io.ReadFull(r, hash); err != nil { + return 0, nil, err + } else { + f.log.V(5).Info("Read hash", "bytes", n, "hash", base64.StdEncoding.EncodeToString(hash)) + } + hashes[offset] = hash + } + f.log.V(3).Info("Number of blocks actually received", "size", len(hashes)) + return blockSize, hashes, nil +} + +func (f *FileHasher) BlockSize() int64 { + return f.blockSize +} diff --git a/pkg/blockrsync/hasher_test.go b/pkg/blockrsync/hasher_test.go new file mode 100644 index 0000000..e6d85cc --- /dev/null +++ b/pkg/blockrsync/hasher_test.go @@ -0,0 +1,122 @@ +package blockrsync + +import ( + "bytes" + "io" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const ( + testImagePath = "../../test_images" + testFileName = "cirros.raw" + testFileSize = 46137344 +) + +var _ = Describe("hasher tests", func() { + var ( + hasher Hasher + ) + + BeforeEach(func() { + hasher = NewFileHasher(DefaultBlockSize, GinkgoLogr.WithName("hasher")) + Expect(hasher).ToNot(BeNil()) + Expect(hasher.BlockSize()).To(Equal(DefaultBlockSize)) + }) + + It("should properly find the file size", func() { + fileSize, err := hasher.(*FileHasher).getFileSize(filepath.Join(testImagePath, testFileName)) + Expect(err).To(BeNil()) + Expect(fileSize).To(Equal(int64(testFileSize))) + }) + + It("should error on invalid file", func() { + size, err := hasher.(*FileHasher).getFileSize("invalid") + Expect(err).ToNot(BeNil()) + Expect(size).To(Equal(int64(0))) + }) + + DescribeTable("should determine concurrency based on file size to block size ratio", func(fileSize, blockSize int64, expectedConcurrency int) { + hasher = NewFileHasher(blockSize, GinkgoLogr.WithName("hasher")) + concurrency := hasher.(*FileHasher).concurrentHashCount(fileSize) + Expect(concurrency).To(Equal(expectedConcurrency)) + }, Entry("file size > 25 * block size", int64(testFileSize), int64(4096), defaultConcurrency), + Entry("file size = block size", int64(4096), int64(4096), 1), + Entry("file size < block size", int64(40960), int64(4096), 10), + ) + + It("should calculate the hashes of a file", func() { + n, err := hasher.HashFile(filepath.Join(testImagePath, testFileName)) + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(int64(testFileSize))) + Expect(hasher.GetHashes()).To(HaveLen(int(testFileSize / DefaultBlockSize))) + }) + + It("should serialize and deserialize hashes", func() { + n, err := hasher.HashFile(filepath.Join(testImagePath, testFileName)) + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(int64(testFileSize))) + var b bytes.Buffer + w := io.Writer(&b) + err = hasher.SerializeHashes(w) + Expect(err).ToNot(HaveOccurred()) + hashes := hasher.GetHashes() + // 16 for the blocksize and length, 72 for each hash + Expect(b.Len()).To(Equal(72*len(hashes) + 16)) + r := io.Reader(&b) + blockSize, h, err := hasher.DeserializeHashes(r) + Expect(err).ToNot(HaveOccurred()) + Expect(blockSize).To(Equal(DefaultBlockSize)) + Expect(h).To(HaveLen(len(hashes))) + }) + + getCirrosHashes := func() map[int64][]byte { + cirrosHasher := NewFileHasher(DefaultBlockSize, GinkgoLogr.WithName("cirros hasher")) + n, err := cirrosHasher.HashFile(filepath.Join(testImagePath, testFileName)) + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(int64(testFileSize))) + return cirrosHasher.GetHashes() + } + + getCirrosHashesModified := func() map[int64][]byte { + res := getCirrosHashes() + res[0] = []byte("modified") + return res + } + + getCirrosHashesEntryRemoved := func() map[int64][]byte { + res := getCirrosHashes() + delete(res, 0) + return res + } + + getLargerCirrosHashes := func() map[int64][]byte { + res := getCirrosHashes() + res[DefaultBlockSize*1000] = []byte("modified") + return res + } + + DescribeTable("It should properly determine differences between hashes", func(cmpHash map[int64][]byte, expected []int64) { + n, err := hasher.HashFile(filepath.Join(testImagePath, testFileName)) + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(int64(testFileSize))) + diff, err := hasher.DiffHashes(DefaultBlockSize, cmpHash) + Expect(err).ToNot(HaveOccurred()) + Expect(diff).To(Equal(expected)) + }, + Entry("no differences", getCirrosHashes(), nil), + Entry("single differences", getCirrosHashesModified(), []int64{0}), + Entry("single differences, removed", getCirrosHashesEntryRemoved(), []int64{0}), + Entry("larger comparison, should strip", getLargerCirrosHashes(), nil), + ) + + It("should fail if block size is different", func() { + n, err := hasher.HashFile(filepath.Join(testImagePath, testFileName)) + Expect(err).ToNot(HaveOccurred()) + Expect(n).To(Equal(int64(testFileSize))) + _, err = hasher.DiffHashes(int64(4096), nil) + Expect(err).To(HaveOccurred()) + }) +}) diff --git a/pkg/blockrsync/progress.go b/pkg/blockrsync/progress.go new file mode 100644 index 0000000..435656d --- /dev/null +++ b/pkg/blockrsync/progress.go @@ -0,0 +1,37 @@ +package blockrsync + +import ( + "fmt" + "time" + + "github.com/go-logr/logr" +) + +type Progress interface { + Start(size int64) + Update(pos int64) +} + +type progress struct { + total int64 + current int64 + progressType string + lastUpdate time.Time + logger logr.Logger + start float64 +} + +func (p *progress) Start(size int64) { + p.total = size + p.current = int64(0) + p.lastUpdate = time.Now() + p.logger.Info(fmt.Sprintf("%s total size %d", p.progressType, p.total)) +} + +func (p *progress) Update(pos int64) { + p.current = pos + if time.Since(p.lastUpdate).Seconds() > time.Second.Seconds() || pos == p.total { + p.logger.Info(fmt.Sprintf("%s %.0f%%", p.progressType, (float64(p.current) / float64(p.total) * 100))) + p.lastUpdate = time.Now() + } +} diff --git a/pkg/blockrsync/progress_test.go b/pkg/blockrsync/progress_test.go new file mode 100644 index 0000000..6de2b49 --- /dev/null +++ b/pkg/blockrsync/progress_test.go @@ -0,0 +1,22 @@ +package blockrsync + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("progress tests", func() { + It("should properly update progress", func() { + p := progress{ + logger: GinkgoLogr.WithName("progress"), + } + p.Start(100) + p.Update(50) + Expect(p.current).To(Equal(int64(50))) + time.Sleep(time.Second) + p.Update(100) + Expect(p.current).To(Equal(int64(100))) + }) +}) diff --git a/pkg/blockrsync/server.go b/pkg/blockrsync/server.go new file mode 100644 index 0000000..9ae3e96 --- /dev/null +++ b/pkg/blockrsync/server.go @@ -0,0 +1,178 @@ +package blockrsync + +import ( + "bufio" + "encoding/binary" + "fmt" + "io" + "net" + "os" + + "github.com/go-logr/logr" + "github.com/golang/snappy" +) + +type BlockRsyncOptions struct { + Preallocation bool + BlockSize int +} + +type BlockrsyncServer struct { + targetFile string + targetFileSize int64 + port int + hasher Hasher + opts *BlockRsyncOptions + log logr.Logger +} + +func NewBlockrsyncServer(targetFile string, port int, opts *BlockRsyncOptions, logger logr.Logger) *BlockrsyncServer { + return &BlockrsyncServer{ + targetFile: targetFile, + port: port, + opts: opts, + log: logger, + hasher: NewFileHasher(int64(opts.BlockSize), logger.WithName("hasher")), + } +} + +func (b *BlockrsyncServer) StartServer() error { + f, err := os.OpenFile(b.targetFile, os.O_RDWR|os.O_CREATE, 0666) + if err != nil { + return err + } + defer f.Close() + readyChan := make(chan struct{}) + + go func() { + defer func() { readyChan <- struct{}{} }() + size, err := b.hasher.HashFile(b.targetFile) + if err != nil { + b.log.Error(err, "Failed to hash file") + return + } + b.targetFileSize = size + b.log.Info("Hashed file with size", "filename", b.targetFile, "size", b.targetFileSize) + }() + + b.log.Info("Listening for tcp connection", "port", fmt.Sprintf(":%d", b.port)) + listener, err := net.Listen("tcp", fmt.Sprintf(":%d", b.port)) + if err != nil { + return err + } + conn, err := listener.Accept() + if err != nil { + return err + } + defer conn.Close() + writer := snappy.NewBufferedWriter(conn) + <-readyChan + + if err := b.writeHashes(writer); err != nil { + return err + } + b.log.Info("Wrote hashes to client, starting diff reader") + reader := bufio.NewReader(snappy.NewReader(conn)) + if err := b.writeBlocksToFile(f, reader); err != nil { + return err + } + + if err := f.Sync(); err != nil { + return err + } + return nil +} + +func (b *BlockrsyncServer) writeHashes(writer io.WriteCloser) error { + defer writer.Close() + if err := b.hasher.SerializeHashes(writer); err != nil { + return err + } + b.log.Info("Wrote hashes to client") + return nil +} + +func (b *BlockrsyncServer) writeBlocksToFile(f *os.File, reader io.Reader) error { + // Read the size of the source file + var sourceSize int64 + if err := binary.Read(reader, binary.LittleEndian, &sourceSize); err != nil { + _, err = handleReadError(err, nocallback) + return err + } + if err := b.truncateFileIfNeeded(f, sourceSize, b.targetFileSize); err != nil { + _, err = handleReadError(err, nocallback) + return err + } + + blockReader := NewBlockReader(reader, int(b.hasher.BlockSize()), b.log.WithName("block-reader")) + cont := true + var err error + for cont { + cont, err = blockReader.Next() + if err != nil { + // Ignore error + return nil + } + if blockReader.IsHole() { + if err := b.handleEmptyBlock(blockReader.Offset(), f); err != nil { + return err + } + } else { + if err := b.writeBlockToOffset(blockReader.Block(), blockReader.Offset(), f); err != nil { + return err + } + } + } + return nil +} + +func (b *BlockrsyncServer) truncateFileIfNeeded(f *os.File, sourceSize, targetSize int64) error { + info, err := f.Stat() + if err != nil { + return err + } + if targetSize > sourceSize { + b.log.V(5).Info("Source size", "size", sourceSize) + if info.Mode()&(os.ModeDevice|os.ModeCharDevice) == 0 { + // Not a block device, truncate the file if it is larger than the source file + // Truncate the target file if it is larger than the source file + b.log.V(5).Info("Source is smaller than target, truncating file") + if err := f.Truncate(sourceSize); err != nil { + return err + } + } else { + // empty out existing blocks + PunchHole(f, sourceSize, targetSize-sourceSize) + } + } + return nil +} + +func (b *BlockrsyncServer) handleEmptyBlock(offset int64, f *os.File) error { + b.log.V(5).Info("Skipping hole", "offset", offset) + emptySize := min(b.targetFileSize-offset, b.hasher.BlockSize()) + if b.opts.Preallocation { + b.log.V(5).Info("Preallocating hole", "offset", offset) + preallocBuffer := make([]byte, emptySize) + if n, err := f.WriteAt(preallocBuffer, offset); err != nil || int64(n) != emptySize { + return err + } + } else { + b.log.V(5).Info("Punching hole", "offset", offset, "size", b.hasher.BlockSize()) + PunchHole(f, offset, b.hasher.BlockSize()) + } + return nil +} + +func (b *BlockrsyncServer) writeBlockToOffset(block []byte, offset int64, ws io.WriteSeeker) error { + _, err := ws.Seek(offset, io.SeekStart) + if err != nil { + return err + } + if n, err := ws.Write(block); err != nil { + return err + } else { + b.log.V(5).Info("Wrote", "bytes", n) + } + return nil +} diff --git a/pkg/blockrsync/sparse.go b/pkg/blockrsync/sparse.go new file mode 100644 index 0000000..2eac181 --- /dev/null +++ b/pkg/blockrsync/sparse.go @@ -0,0 +1,26 @@ +package blockrsync + +import ( + "errors" + "os" + "syscall" +) + +const ( + FALLOC_FL_KEEP_SIZE = 0x01 /* default is extend size */ + FALLOC_FL_PUNCH_HOLE = 0x02 /* de-allocates range */ +) + +var ( + ErrPunchHoleNotSupported = errors.New("this filesystem does not support punching holes. Use xfs, ext4, btrfs or such") +) + +func PunchHole(f *os.File, offset, size int64) error { + err := syscall.Fallocate(int(f.Fd()), FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, offset, size) + + if err == syscall.ENOTSUP { + err = ErrPunchHoleNotSupported + } + + return err +} diff --git a/pkg/proxy/client.go b/pkg/proxy/client.go new file mode 100644 index 0000000..290f1ce --- /dev/null +++ b/pkg/proxy/client.go @@ -0,0 +1,83 @@ +package proxy + +import ( + "fmt" + "io" + "net" + "time" + + "github.com/go-logr/logr" +) + +type ProxyClient struct { + listenPort int + targetPort int + targetAddress string + log logr.Logger +} + +func NewProxyClient(listenPort, targetPort int, targetAddress string, logger logr.Logger) *ProxyClient { + return &ProxyClient{ + listenPort: listenPort, + targetPort: targetPort, + targetAddress: targetAddress, + log: logger, + } +} + +func (b *ProxyClient) ConnectToTarget(identifier string) error { + if len(identifier) != identifierLength { + return fmt.Errorf("identifier must be %d characters", identifierLength) + } + b.log.Info("Listening:", "host", "localhost", "port", b.listenPort) + // Create a listener on the desired port + listener, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", b.listenPort)) + if err != nil { + return err + } + + // Accept incoming connections + inConn, err := listener.Accept() + if err != nil { + return err + } + defer inConn.Close() + + b.log.Info("Connecting to target", "address", b.targetAddress, "port", b.targetPort) + retry := true + var outConn net.Conn + retryCount := 0 + for retry { + outConn, err = net.Dial("tcp", fmt.Sprintf("%s:%d", b.targetAddress, b.targetPort)) + retry = err != nil + if err != nil { + b.log.Error(err, "Unable to connect to target") + } + if retry { + retryCount++ + time.Sleep(time.Second) + if retryCount > 30 { + return fmt.Errorf("unable to connect to target after %d retries", retryCount) + } + } + } + defer outConn.Close() + + // Write the header to the writer + _, err = outConn.Write([]byte(identifier)) + if err != nil { + return err + } + + go func() { + n, _ := io.Copy(inConn, outConn) + b.log.Info("bytes copied from server to client", "count", n) + }() + + n, err := io.Copy(outConn, inConn) + if err != nil { + return err + } + b.log.Info("bytes copied", "count", n) + return nil +} diff --git a/pkg/proxy/server.go b/pkg/proxy/server.go new file mode 100644 index 0000000..4a86b59 --- /dev/null +++ b/pkg/proxy/server.go @@ -0,0 +1,185 @@ +package proxy + +import ( + "fmt" + "io" + "log" + "net" + "os" + "os/exec" + "strconv" + "sync" + "time" + + "github.com/go-logr/logr" +) + +const ( + identifierLength = 32 // Length of the md5sum + blockRsyncPort = 3222 +) + +type ProxyServer struct { + listenPort int // Port to listen on + blockrsyncPath string // Path to blockrsync binary + blockSize int // Block size to use + log logr.Logger + identifiers []string + wg sync.WaitGroup +} + +func NewProxyServer(blockrsyncPath string, blockSize, listenPort int, identifiers []string, logger logr.Logger) *ProxyServer { + return &ProxyServer{ + listenPort: listenPort, + blockrsyncPath: blockrsyncPath, + log: logger, + identifiers: identifiers, + blockSize: blockSize, + } +} + +func (b *ProxyServer) StartServer() error { + for _, identifier := range b.identifiers { + if len(identifier) != identifierLength { + return fmt.Errorf("identifier must be %d characters", identifierLength) + } + } + b.log.Info("Listening:", "host", "localhost", "port", b.listenPort) + // Create a listener on the desired port + listener, err := net.Listen("tcp", fmt.Sprintf(":%d", b.listenPort)) + if err != nil { + log.Fatal(err) + } + mu := &sync.Mutex{} + processingMap := make(map[string]int) + + for i := 1; i <= len(b.identifiers); i++ { + b.wg.Add(1) + go b.processConnection(listener, processingMap, mu, i) + } + b.wg.Wait() + return nil +} + +func (b *ProxyServer) processConnection(listener net.Listener, processing map[string]int, mu *sync.Mutex, i int) { + keepTrying := true + for keepTrying { + b.log.Info("Waiting for connection") + // Accept incoming connections + conn, err := listener.Accept() + if err != nil { + b.log.Error(err, "Unable to accept connection") + } + file, header, err := b.getTargetFileFromIdentifier(conn) + if err != nil { + b.log.Error(err, "Unable to get target file from identifier") + conn.Close() + } + mu.Lock() + if processing[header] > 0 { + // Someone else is processing same header, ignore this connection + b.log.Info("other thread is processing header", "thread", processing[header], "header", header) + mu.Unlock() + conn.Close() + continue + } else { + b.log.Info("processing header", "header", header, "thread", i) + processing[header] = i + mu.Unlock() + } + + b.log.Info("Accepted connection, starting blockrsync server", "port", blockRsyncPort+i) + err = b.startsBlockrsyncServer(conn, file, blockRsyncPort+i) + if err != nil { + b.log.Error(err, "Unable to start blockrsync server") + } else { + b.wg.Done() + keepTrying = false + } + } +} + +func (b *ProxyServer) getTargetFileFromIdentifier(conn net.Conn) (string, string, error) { + header := make([]byte, identifierLength) + n, err := io.ReadFull(conn, header) + if err != nil { + return "", "", err + } + if n != identifierLength { + return "", "", fmt.Errorf("expected %d bytes, got %d", identifierLength, n) + } + file := os.Getenv(string(header)) + if file == "" { + file = os.Getenv((fmt.Sprintf("id-%s", header))) + if file == "" { + return "", "", fmt.Errorf("no filepath found for %s", string(header)) + } + } + return file, string(header), nil +} + +func (b *ProxyServer) startsBlockrsyncServer(rw io.ReadWriteCloser, file string, port int) error { + defer rw.Close() + + b.log.Info("writing to file", "file", file) + go b.forkProcess(file, port) + + notConnect := true + var blockRsyncConn net.Conn + var err error + for notConnect { + b.log.Info("Connecting to blockrsync server", "port", port) + blockRsyncConn, err = net.Dial("tcp", fmt.Sprintf("localhost:%d", port)) + if err != nil { + b.log.Info("Waiting to connect to blockrsync server", "error", err) + time.Sleep(1 * time.Second) + } else { + b.log.Info("Connected to blockrsync server") + notConnect = false + } + } + go func() { + _, err = io.Copy(rw, blockRsyncConn) + if err != nil { + b.log.Error(err, "Unable to copy data from server to client") + } + }() + b.log.Info("Copying data") + _, err = io.Copy(blockRsyncConn, rw) + if err != nil { + b.log.Error(err, "Unable to copy data from client to server") + return err + } + + b.log.Info("Successfully completed sync proxy") + return nil +} + +func (b *ProxyServer) forkProcess(file string, port int) { + arguments := []string{ + file, + "--target", + "--port", + strconv.Itoa(port), + "--zap-log-level", + "3", + "--block-size", + strconv.Itoa(b.blockSize), + } + + b.log.Info("Starting blockrsync server", "arguments", arguments) + cmd := exec.Command(b.blockrsyncPath, arguments...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + // Start the command + err := cmd.Start() + if err != nil { + b.log.Error(err, "Unable to start blockrsync server") + } + // Wait for the command to finish + err = cmd.Wait() + if err != nil { + b.log.Error(err, "Waiting for blockrsync server to complete") + } +} diff --git a/test_images/cirros.raw b/test_images/cirros.raw new file mode 100644 index 0000000..ee15ec4 Binary files /dev/null and b/test_images/cirros.raw differ