diff --git a/exp/services/ledgerexporter/DEVELOPER_GUIDE.md b/exp/services/ledgerexporter/DEVELOPER_GUIDE.md new file mode 100644 index 0000000000..ef81553e37 --- /dev/null +++ b/exp/services/ledgerexporter/DEVELOPER_GUIDE.md @@ -0,0 +1,65 @@ + +# Ledger Exporter Developer Guide +The ledger exporter is a tool to export Stellar network transaction data to cloud storage in a way that is easy to access. + +## Prerequisites +This document assumes that you have installed and can run the ledger exporter, and that you have familiarity with its CLI and configuration. If not, please refer to the [Installation Guide](./README.md). + +## Goal +The goal of the ledger exporter is to build an easy-to-use tool to export Stellar network ledger data to a configurable remote data store, such as cloud blob storage. + - Use cloud storage optimally + - Minimize network usage to export + - Make it easy and fast to search for a specific ledger or ledger range + +## Architecture +To achieve its goals, the ledger exporter uses the following architecture, which consists of the 3 main components: +- Captive-core to extract raw transaction metadata from the Stellar Network. +- Export manager to bundles and organizes the ledgers to get them ready for export. +- The cloud storage plugin writes to the cloud storage. This is specific to the type of cloud storage, GCS in this case. + + +![ledgerexporter-architecture](./architecture.png) + + +## Data Format +- Ledger exporter uses a compact and efficient data format called [XDR](https://developers.stellar.org/docs/learn/encyclopedia/data-format/xdr) (External Data Representation), which is a compact binary format. A Stellar Captive Core instance emits data in this format and the data structure is referred to as `LedgerCloseMeta`. The exporter bundles multiple `LedgerCloseMeta`'s into a single object using a custom XDR structure called `LedgerCloseMetaBatch` which is defined in [Stellar-exporter.x](https://github.com/stellar/go/blob/master/xdr/Stellar-exporter.x). + +- The metadata for the same batch is also stored alongside each exported object. Supported metadata is defined in [metadata.go](https://github.com/stellar/go/blob/master/support/datastore/metadata.go). + +- Objects are compressed before uploading using the [zstd](http://facebook.github.io/zstd/) (zstandard) compression algorithm to optimize network usage and storage needs. + +## Data Storage +- An example implementation of `DataStore` for GCS, Google Cloud Storage. This plugin is located in the [support](https://github.com/stellar/go/tree/master/support/datastore) package. +- The ledger exporter currently implements the interface only for Google Cloud Storage (GCS). The [GCS plugin](https://github.com/stellar/go/blob/master/support/datastore/gcs_datastore.go) uses GCS-specific behaviors like conditional puts, automatic retry, metadata, and CRC checksum. + +## Build, Run and Test using Docker +The Dockerfile contains all the necessary dependencies (e.g., Stellar-core) required to run the ledger exporter. + +- Build: To build the Docker container, use the provided [Makefile](./Makefile). Simply run make `make docker-build` to build a new container after making any changes. + +- Run: For instructions on running the Docker container, refer to the [Installation Guide](./README.md). + +- Test: To test the Docker container, refer to the [docker-test](./Makefile) command for an example of how to use the [GCS emulator](https://github.com/fsouza/fake-gcs-server) for local testing. + +## Adding support for a new storage type +Support for different data storage types are encapsulated as 'plugins', which are implementation of `DataStore` interface in a go package. To add a data storage plugin based on a new storage type (e.g. AWS S3), follow these steps: + +- A data storage plugin must implement the [DataStore](https://github.com/stellar/go/blob/master/support/datastore/datastore.go) interface. +- Add support for new datastore-specific features. Implement any datastore-specific custom logic. Different datastores have different ways of handling + - race conditions + - automatic retries + - metadata storage, etc. +- Add the new datastore to the factory function [NewDataStore](https://github.com/stellar/go/blob/master/support/datastore/datastore.go). +- Add a [config](./config.example.toml) section for the new storage type. This may include configurations like destination, authentication information etc. +- An emulator such as a GCS emulator [fake-gcs-server](https://github.com/fsouza/fake-gcs-server) can be used for testing without connecting to real cloud storage. + +### Design DOs and DONTs +- Multiple exporters should be able to run in parallel without the need for explicit locking or synchronization. +- Exporters when restarted do not have any memory of prior operation and rely on the already exported data as much as possible to decide where to resume. + +## Using exported data +The exported data in storage can be used in the ETL pipeline to gather analytics and reporting. To write a tool that consumes exported data you can use Stellar ingestion library's `ledgerbackend` package. This package includes a ledger backend called [BufferedStorageBackend](https://github.com/stellar/go/blob/master/ingest/ledgerbackend/buffered_storage_backend.go), +which imports data from the storage and validates it. For more details, refer to the ledgerbackend [documentation](https://github.com/stellar/go/tree/master/ingest/ledgerbackend). + +## Contributing +For information on how to contribute, please refer to our [Contribution Guidelines](https://github.com/stellar/go/blob/master/CONTRIBUTING.md). diff --git a/exp/services/ledgerexporter/README.md b/exp/services/ledgerexporter/README.md index 57757508e1..008981b551 100644 --- a/exp/services/ledgerexporter/README.md +++ b/exp/services/ledgerexporter/README.md @@ -1,101 +1,130 @@ -# Ledger Exporter (Work in Progress) +## Ledger Exporter: Installation and Usage Guide -The Ledger Exporter is a tool designed to export ledger data from a Stellar network and upload it to a specified destination. It supports both bounded and unbounded modes, allowing users to export a specific range of ledgers or continuously export new ledgers as they arrive on the network. +This guide provides step-by-step instructions on installing and using the Ledger Exporter, a tool that exports Stellar network ledger data to a Google Cloud Storage (GCS) bucket for efficient analysis and storage. -Ledger Exporter currently uses captive-core as the ledger backend and GCS as the destination data store. +* [Prerequisites](#prerequisites) +* [Setup](#setup) + * [Set Up GCP Credentials](#set-up-gcp-credentials) + * [Create a GCS Bucket for Storage](#create-a-gcs-bucket-for-storage) +* [Running the Ledger Exporter](#running-the-ledger-exporter) + * [Pull the Docker Image](#1-pull-the-docker-image) + * [Configure the Exporter](#2-configure-the-exporter-configtoml) + * [Run the Exporter](#3-run-the-exporter) +* [Command Line Interface (CLI)](#command-line-interface-cli) + 1. [scan-and-fill: Fill Data Gaps](#1-scan-and-fill-fill-data-gaps) + 2. [append: Continuously Export New Data](#2-append-continuously-export-new-data) -# Exported Data Format -The tool allows for the export of multiple ledgers in a single exported file. The exported data is in XDR format and is compressed using zstd before being uploaded. +## Prerequisites -```go -type LedgerCloseMetaBatch struct { - StartSequence uint32 - EndSequence uint32 - LedgerCloseMetas []LedgerCloseMeta -} -``` +* **Google Cloud Platform (GCP) Account:** You will need a GCP account to create a GCS bucket for storing the exported data. +* **Docker:** Allows you to run the Ledger Exporter in a self-contained environment. The official Docker installation guide: [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/) + +## Setup + +### Set Up GCP Credentials + +Create application default credentials for your Google Cloud Platform (GCP) project by following these steps: +1. Download the [SDK](https://cloud.google.com/sdk/docs/install). +2. Install and initialize the [gcloud CLI](https://cloud.google.com/sdk/docs/initializing). +3. Create [application authentication credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc#google-idp) and store it in a secure location on your system, such as $HOME/.config/gcloud/application_default_credentials.json. + +For detailed instructions, refer to the [Providing Credentials for Application Default Credentials (ADC) guide.](https://cloud.google.com/docs/authentication/provide-credentials-adc) + +### Create a GCS Bucket for Storage -## Getting Started +1. Go to the GCP Console's Storage section ([https://console.cloud.google.com/storage](https://console.cloud.google.com/storage)) and create a new bucket. +2. Choose a descriptive name for the bucket, such as `stellar-ledger-data`. Refer to [Google Cloud Storage Bucket Naming Guideline](https://cloud.google.com/storage/docs/buckets#naming) for more information. +3. **Note down the bucket name** as you'll need it later in the configuration process. -### Installation (coming soon) -### Command Line Options +## Running the Ledger Exporter + +### 1. Pull the Docker Image + +Open a terminal window and download the Stellar Ledger Exporter Docker image using the following command: -#### Scan and Fill Mode: -Exports a specific range of ledgers, defined by --start and --end. Will only export to remote datastore if data is absent. ```bash -ledgerexporter scan-and-fill --start --end --config-file +docker pull stellar/ledger-exporter ``` -#### Append Mode: -Exports ledgers initially searching from --start, looking for the next absent ledger sequence number proceeding --start on the data store. If abscence is detected, the export range is narrowed to `--start `. -This feature requires ledgers to be present on the remote data store for some (possibly empty) prefix of the requested range and then absent for the (possibly empty) remainder. +### 2. Configure the Exporter (config.toml) +The Ledger Exporter relies on a configuration file (config.toml) to connect to your specific environment. This file defines details like: +- Your Google Cloud Storage (GCS) bucket where exported ledger data will be stored. +- Stellar network settings, such as the network you're using (testnet or pubnet). +- Datastore schema to control data organization. -In this mode, the --end ledger can be provided to stop the process once export has reached that ledger, or if absent or 0 it will result in continous exporting of new ledgers emitted from the network. +A sample configuration file [config.example.toml](config.example.toml) is provided. Copy and rename it to config.toml for customization. Edit the copied file (config.toml) to replace placeholders with your specific details. + +### 3. Run the Exporter + +The following command demonstrates how to run the Ledger Exporter: - It’s guaranteed that ledgers exported during `append` mode from `start` and up to the last logged ledger file `Uploaded {ledger file name}` were contiguous, meaning all ledgers within that range were exported to the data lake with no gaps or missing ledgers in between. ```bash -ledgerexporter append --start --config-file +docker run --platform linux/amd64 \ + -v "$HOME/.config/gcloud/application_default_credentials.json":/.config/gcp/credentials.json:ro \ + -e GOOGLE_APPLICATION_CREDENTIALS=/.config/gcp/credentials.json \ + -v ${PWD}/config.toml:/config.toml \ + stellar/ledger-exporter [options] ``` -### Configuration (toml): -The `stellar_core_config` supports two ways for configuring captive core: - - use prebuilt captive core config toml, archive urls, and passphrase based on `stellar_core_config.network = testnet|pubnet`. - - manually set the the captive core confg by supplying these core parameters which will override any defaults when `stellar_core_config.network` is present also: - `stellar_core_config.captive_core_toml_path` - `stellar_core_config.history_archive_urls` - `stellar_core_config.network_passphrase` +**Explanation:** -Ensure you have stellar-core installed and set `stellar_core_config.stellar_core_binary_path` to it's path on o/s. +* `--platform linux/amd64`: Specifies the platform architecture (adjust if needed for your system). +* `-v`: Mounts volumes to map your local GCP credentials and config.toml file to the container: + * `$HOME/.config/gcloud/application_default_credentials.json`: Your local GCP credentials file. + * `${PWD}/config.toml`: Your local configuration file. +* `-e GOOGLE_APPLICATION_CREDENTIALS=/.config/gcp/credentials.json`: Sets the environment variable for credentials within the container. +* `stellar/ledger-exporter`: The Docker image name. +* ``: The Stellar Ledger Exporter command: [append](#1-append-continuously-export-new-data), [scan-and-fill](#2-scan-and-fill-fill-data-gaps)) -Enable web service that will be bound to localhost post and publishes metrics by including `admin_port = {port}` +## Command Line Interface (CLI) -An example config, demonstrating preconfigured captive core settings and gcs data store config. -```toml -admin_port = 6061 +The Ledger Exporter offers two mode of operation for exporting ledger data: -[datastore_config] -type = "GCS" +### 1. append: Continuously Export New Data -[datastore_config.params] -destination_bucket_path = "your-bucket-name///" -[datastore_config.schema] -ledgers_per_file = 64 -files_per_partition = 10 +Exports ledgers initially searching from --start, looking for the next absent ledger sequence number proceeding --start on the data store. If abscence is detected, the export range is narrowed to `--start `. +This feature requires ledgers to be present on the remote data store for some (possibly empty) prefix of the requested range and then absent for the (possibly empty) remainder. -[stellar_core_config] - network = "testnet" - stellar_core_binary_path = "/my/path/to/stellar-core" - captive_core_toml_path = "my-captive-core.cfg" - history_archive_urls = ["http://testarchiveurl1", "http://testarchiveurl2"] - network_passphrase = "test" -``` +In this mode, the --end ledger can be provided to stop the process once export has reached that ledger, or if absent or 0 it will result in continous exporting of new ledgers emitted from the network. -### Exported Files +It’s guaranteed that ledgers exported during `append` mode from `start` and up to the last logged ledger file `Uploaded {ledger file name}` were contiguous, meaning all ledgers within that range were exported to the data lake with no gaps or missing ledgers in between. -#### File Organization: -- Ledgers are grouped into files, with the number of ledgers per file set by `ledgers_per_file`. -- Files are further organized into partitions, with the number of files per partition set by `files_per_partition`. -### Filename Structure: -- Filenames indicate the ledger range they contain, e.g., `0-63.xdr.zstd` holds ledgers 0 to 63. -- Partition directories group files, e.g., `/0-639/` holds files for ledgers 0 to 639. +**Usage:** -#### Example: -with `ledgers_per_file = 64` and `files_per_partition = 10`: -- Partition names: `/0-639`, `/640-1279`, ... -- Filenames: `/0-639/0-63.xdr.zstd`, `/0-639/64-127.xdr.zstd`, ... +```bash +docker run --platform linux/amd64 -d \ + -v "$HOME/.config/gcloud/application_default_credentials.json":/.config/gcp/credentials.json:ro \ + -e GOOGLE_APPLICATION_CREDENTIALS=/.config/gcp/credentials.json \ + -v ${PWD}/config.toml:/config.toml \ + stellar/ledger-exporter \ + append --start [--end ] [--config-file ] +``` + +Arguments: +- `--start ` (required): The starting ledger sequence number for the export process. +- `--end ` (optional): The ending ledger sequence number. If omitted or set to 0, the exporter will continuously export new ledgers as they appear on the network. +- `--config-file ` (optional): The path to your configuration file, containing details like GCS bucket information. If not provided, the exporter will look for config.toml in the directory where you run the command. + +### 2. scan-and-fill: Fill Data Gaps -#### Special Cases: +Scans the datastore (GCS bucket) for the specified ledger range and exports any missing ledgers to the datastore. This mode avoids unnecessary exports if the data is already present. The range is specified using the --start and --end options. -- If `ledgers_per_file` is set to 1, filenames will only contain the ledger number. -- If `files_per_partition` is set to 1, filenames will not contain the partition. +**Usage:** -#### Note: -- Avoid changing `ledgers_per_file` and `files_per_partition` after configuration for consistency. +```bash +docker run --platform linux/amd64 -d \ + -v "$HOME/.config/gcloud/application_default_credentials.json":/.config/gcp/credentials.json:ro \ + -e GOOGLE_APPLICATION_CREDENTIALS=/.config/gcp/credentials.json \ + -v ${PWD}/config.toml:/config.toml \ + stellar/ledger-exporter \ + scan-and-fill --start --end [--config-file ] +``` -#### Retrieving Data: -- To locate a specific ledger sequence, calculate the partition name and ledger file name using `files_per_partition` and `ledgers_per_file`. -- The `GetObjectKeyFromSequenceNumber` function automates this calculation. +Arguments: +- `--start ` (required): The starting ledger sequence number in the range to export. +- `--end ` (required): The ending ledger sequence number in the range. +- `--config-file ` (optional): The path to your configuration file, containing details like GCS bucket information. If not provided, the exporter will look for config.toml in the directory where you run the command. diff --git a/exp/services/ledgerexporter/architecture.png b/exp/services/ledgerexporter/architecture.png new file mode 100644 index 0000000000..85bd6d8b31 Binary files /dev/null and b/exp/services/ledgerexporter/architecture.png differ diff --git a/exp/services/ledgerexporter/config.example.toml b/exp/services/ledgerexporter/config.example.toml new file mode 100644 index 0000000000..db72169311 --- /dev/null +++ b/exp/services/ledgerexporter/config.example.toml @@ -0,0 +1,42 @@ + +# Sample TOML Configuration + +# Admin port configuration +# Specifies the port number for hosting the web service locally to publish metrics. +admin_port = 6061 + +# Datastore Configuration +[datastore_config] +# Specifies the type of datastore. Currently, only Google Cloud Storage (GCS) is supported. +type = "GCS" + +[datastore_config.params] +# The Google Cloud Storage bucket path for storing data, with optional subpaths for organization. +destination_bucket_path = "your-bucket-name///" + +[datastore_config.schema] +# Configuration for data organization +ledgers_per_file = 64 # Number of ledgers stored in each file. +files_per_partition = 10 # Number of files per partition/directory. + +# Stellar-core Configuration +[stellar_core_config] +# Use default captive-core config based on network +# Options are "testnet" for the test network or "pubnet" for the public network. +network = "testnet" + +# Alternatively, you can manually configure captive-core parameters (overrides defaults if 'network' is set). + +# Path to the captive-core configuration file. +#captive_core_config_path = "my-captive-core.cfg" + +# URLs for Stellar history archives, with multiple URLs allowed. +#history_archive_urls = ["http://testarchiveurl1", "http://testarchiveurl2"] + +# Network passphrase for the Stellar network. +#network_passphrase = "Test SDF Network ; September 2015" + +# Path to stellar-core binary +# Not required when running in a Docker container as it has the stellar-core installed and path is set. +# When running outside of Docker, it will look for stellar-core in the OS path if it exists. +#stellar_core_binary_path = "/my/path/to/stellar-core diff --git a/exp/services/ledgerexporter/config.toml b/exp/services/ledgerexporter/config.toml deleted file mode 100644 index c41d9376ac..0000000000 --- a/exp/services/ledgerexporter/config.toml +++ /dev/null @@ -1,14 +0,0 @@ -[datastore_config] -type = "GCS" - -[datastore_config.params] -destination_bucket_path = "exporter-test/ledgers/testnet" - -[datastore_config.schema] -ledgers_per_file = 1 -files_per_partition = 64000 - -[stellar_core_config] - network = "testnet" - stellar_core_binary_path = "/usr/local/bin/stellar-core" - diff --git a/exp/services/ledgerexporter/internal/config.go b/exp/services/ledgerexporter/internal/config.go index d5aad53256..013a3ef8d7 100644 --- a/exp/services/ledgerexporter/internal/config.go +++ b/exp/services/ledgerexporter/internal/config.go @@ -124,8 +124,8 @@ func (config *Config) ValidateAndSetLedgerRange(ctx context.Context, archive his return errors.New("invalid end value, must be greater than start") } - latestNetworkLedger, err := datastore.GetLatestLedgerSequenceFromHistoryArchives(archive) - latestNetworkLedger = latestNetworkLedger + (datastore.GetHistoryArchivesCheckPointFrequency() * 2) + latestNetworkLedger, err := archive.GetLatestLedgerSequence() + latestNetworkLedger = latestNetworkLedger + (archive.GetCheckpointManager().GetCheckpointFrequency() * 2) if err != nil { return errors.Wrap(err, "Failed to retrieve the latest ledger sequence from history archives.") @@ -189,7 +189,7 @@ func (config *Config) GenerateCaptiveCoreConfig(coreBinFromPath string) (ledgerb BinaryPath: config.StellarCoreConfig.StellarCoreBinaryPath, NetworkPassphrase: params.NetworkPassphrase, HistoryArchiveURLs: params.HistoryArchiveURLs, - CheckpointFrequency: datastore.GetHistoryArchivesCheckPointFrequency(), + CheckpointFrequency: historyarchive.DefaultCheckpointFrequency, Log: logger.WithField("subservice", "stellar-core"), Toml: captiveCoreToml, UserAgent: "ledger-exporter", diff --git a/exp/services/ledgerexporter/internal/config_test.go b/exp/services/ledgerexporter/internal/config_test.go index f782de5ea4..d1c24cb198 100644 --- a/exp/services/ledgerexporter/internal/config_test.go +++ b/exp/services/ledgerexporter/internal/config_test.go @@ -5,19 +5,20 @@ import ( "fmt" "testing" + "github.com/stellar/go/historyarchive" "github.com/stellar/go/network" - "github.com/stellar/go/support/datastore" "github.com/stretchr/testify/require" - - "github.com/stellar/go/historyarchive" ) func TestNewConfig(t *testing.T) { ctx := context.Background() mockArchive := &historyarchive.MockArchive{} - mockArchive.On("GetRootHAS").Return(historyarchive.HistoryArchiveState{CurrentLedger: 5}, nil).Once() + mockArchive.On("GetLatestLedgerSequence").Return(uint32(5), nil).Once() + mockArchive.On("GetCheckpointManager"). + Return(historyarchive.NewCheckpointManager( + historyarchive.DefaultCheckpointFrequency)).Once() config, err := NewConfig( RuntimeSettings{StartLedger: 2, EndLedger: 3, ConfigFilePath: "test/test.toml", Mode: Append}, nil) @@ -198,7 +199,7 @@ func TestInvalidCaptiveCoreTomlPath(t *testing.T) { func TestValidateStartAndEndLedger(t *testing.T) { latestNetworkLedger := uint32(20000) - latestNetworkLedgerPadding := datastore.GetHistoryArchivesCheckPointFrequency() * 2 + latestNetworkLedgerPadding := historyarchive.DefaultCheckpointFrequency * 2 tests := []struct { name string @@ -282,7 +283,10 @@ func TestValidateStartAndEndLedger(t *testing.T) { ctx := context.Background() mockArchive := &historyarchive.MockArchive{} - mockArchive.On("GetRootHAS").Return(historyarchive.HistoryArchiveState{CurrentLedger: latestNetworkLedger}, nil) + mockArchive.On("GetLatestLedgerSequence").Return(latestNetworkLedger, nil) + mockArchive.On("GetCheckpointManager"). + Return(historyarchive.NewCheckpointManager( + historyarchive.DefaultCheckpointFrequency)) mockedHasCtr := 0 for _, tt := range tests { @@ -302,7 +306,7 @@ func TestValidateStartAndEndLedger(t *testing.T) { } }) } - mockArchive.AssertNumberOfCalls(t, "GetRootHAS", mockedHasCtr) + mockArchive.AssertExpectations(t) } func TestAdjustedLedgerRangeBoundedMode(t *testing.T) { @@ -358,7 +362,10 @@ func TestAdjustedLedgerRangeBoundedMode(t *testing.T) { ctx := context.Background() mockArchive := &historyarchive.MockArchive{} - mockArchive.On("GetRootHAS").Return(historyarchive.HistoryArchiveState{CurrentLedger: 500}, nil).Times(len(tests)) + mockArchive.On("GetLatestLedgerSequence").Return(uint32(500), nil) + mockArchive.On("GetCheckpointManager"). + Return(historyarchive.NewCheckpointManager( + historyarchive.DefaultCheckpointFrequency)) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -421,7 +428,10 @@ func TestAdjustedLedgerRangeUnBoundedMode(t *testing.T) { ctx := context.Background() mockArchive := &historyarchive.MockArchive{} - mockArchive.On("GetRootHAS").Return(historyarchive.HistoryArchiveState{CurrentLedger: 500}, nil).Times(len(tests)) + mockArchive.On("GetLatestLedgerSequence").Return(uint32(500), nil) + mockArchive.On("GetCheckpointManager"). + Return(historyarchive.NewCheckpointManager( + historyarchive.DefaultCheckpointFrequency)) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/historyarchive/archive.go b/historyarchive/archive.go index 4f9e14380f..d97471b42f 100644 --- a/historyarchive/archive.go +++ b/historyarchive/archive.go @@ -71,6 +71,7 @@ type ArchiveInterface interface { GetLedgerHeader(chk uint32) (xdr.LedgerHeaderHistoryEntry, error) GetRootHAS() (HistoryArchiveState, error) GetLedgers(start, end uint32) (map[uint32]*Ledger, error) + GetLatestLedgerSequence() (uint32, error) GetCheckpointHAS(chk uint32) (HistoryArchiveState, error) PutCheckpointHAS(chk uint32, has HistoryArchiveState, opts *CommandOptions) error PutRootHAS(has HistoryArchiveState, opts *CommandOptions) error @@ -176,6 +177,16 @@ func (a *Archive) PutPathHAS(path string, has HistoryArchiveState, opts *Command return a.backend.PutFile(path, io.NopCloser(bytes.NewReader(buf))) } +func (a *Archive) GetLatestLedgerSequence() (uint32, error) { + has, err := a.GetRootHAS() + if err != nil { + log.Error("Error getting root HAS from archive", err) + return 0, errors.Wrap(err, "failed to retrieve the latest ledger sequence from history archive") + } + + return has.CurrentLedger, nil +} + func (a *Archive) BucketExists(bucket Hash) (bool, error) { return a.cachedExists(BucketPath(bucket)) } diff --git a/historyarchive/archive_pool.go b/historyarchive/archive_pool.go index 48178ade26..28967d8aa6 100644 --- a/historyarchive/archive_pool.go +++ b/historyarchive/archive_pool.go @@ -204,6 +204,16 @@ func (pa *ArchivePool) GetCheckpointHAS(chk uint32) (HistoryArchiveState, error) }) } +func (pa *ArchivePool) GetLatestLedgerSequence() (uint32, error) { + has, err := pa.GetRootHAS() + if err != nil { + log.Error("Error getting root HAS from archive", err) + return 0, errors.Wrap(err, "failed to retrieve the latest ledger sequence from history archive") + } + + return has.CurrentLedger, nil +} + func (pa *ArchivePool) PutCheckpointHAS(chk uint32, has HistoryArchiveState, opts *CommandOptions) error { return pa.runRoundRobin(func(ai ArchiveInterface) error { return ai.PutCheckpointHAS(chk, has, opts) diff --git a/historyarchive/mocks.go b/historyarchive/mocks.go index fa5716e5de..efe333cd33 100644 --- a/historyarchive/mocks.go +++ b/historyarchive/mocks.go @@ -10,6 +10,11 @@ type MockArchive struct { mock.Mock } +func (m *MockArchive) GetLatestLedgerSequence() (uint32, error) { + a := m.Called() + return a.Get(0).(uint32), a.Error(1) +} + func (m *MockArchive) GetCheckpointManager() CheckpointManager { a := m.Called() return a.Get(0).(CheckpointManager) diff --git a/ingest/README.md b/ingest/README.md index cf3d38f8da..277807f978 100644 --- a/ingest/README.md +++ b/ingest/README.md @@ -67,15 +67,15 @@ func main() { _(The `panicIf` function is defined in the [footnotes](#footnotes); it's used here for error-checking brevity.)_ -Notice that the mysterious `config` variable above isn't defined. This will be environment-specific and users should consult both the [Captive Core documentation](../../services/horizon/internal/docs/captive_core.md) and the [config docs](./ledgerbackend/captive_core_backend.go#L96-L125) directly for more details if they want to use this backend in production. For now, though, we'll have some hardcoded values for the SDF testnet: +Notice that the mysterious `config` variable above isn't defined. This will be environment-specific and refer to the code [here](./ledgerbackend/captive_core_backend.go) for the complete list of configuration parameters. For now, we'll use the [default](../network/main.go) values defined for the SDF testnet: ```go -networkPassphrase := "Test SDF Network ; September 2015" +archiveURLs := network.TestNetworkhistoryArchiveURLs +networkPassphrase := network.TestNetworkPassphrase captiveCoreToml, err := ledgerbackend.NewCaptiveCoreToml( ledgerbackend.CaptiveCoreTomlParams{ NetworkPassphrase: networkPassphrase, - HistoryArchiveURLs: []string{ - "https://history.stellar.org/prd/core-testnet/core_testnet_001", + HistoryArchiveURLs: archiveURLs, }, }) panicIf(err) @@ -258,7 +258,7 @@ As of this writing, the stats are as follows: - total operations: 33845 - succeeded / failed: 25387 / 8458 -The full, runnable example is available [here](./example_statistics.go). +The full, runnable example is available [here](./tutorial/example_statistics.go). # **Example**: Feature Popularity @@ -392,4 +392,4 @@ func panicIf(err error) { 2. Since the Stellar testnet undergoes periodic resets, the example outputs from various sections (especially regarding network statistics) will not always be accurate. - 3. It's worth noting that even though the [second example](example-tracking-feature-popularity) could *also* be done by using the `LedgerTransactionReader` and inspecting the individual operations, that'd be bit redundant as far as examples go. + 3. It's worth noting that even though the [second example](#example-feature-popularity) could *also* be done by using the `LedgerTransactionReader` and inspecting the individual operations, that'd be bit redundant as far as examples go. diff --git a/ingest/doc.go b/ingest/doc.go index d7fa6ebc95..e4360b9acc 100644 --- a/ingest/doc.go +++ b/ingest/doc.go @@ -8,8 +8,7 @@ possible features. This is why this package was created. # Ledger Backend Ledger backends are sources of information about Stellar network ledgers. This -can be, for example: a Stellar-Core database, (possibly-remote) Captive -Stellar-Core instances, or History Archives. Please consult the "ledgerbackend" +can be, for example: Captive Stellar-Core instances. Please consult the "ledgerbackend" package docs for more information about each backend. Warning: Ledger backends provide low-level xdr.LedgerCloseMeta that should not diff --git a/ingest/tutorial/example_common.go b/ingest/tutorial/example_common.go index 133ac02de6..aaeb14d447 100644 --- a/ingest/tutorial/example_common.go +++ b/ingest/tutorial/example_common.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/stellar/go/ingest/ledgerbackend" + "github.com/stellar/go/network" ) var ( @@ -11,12 +12,8 @@ var ( ) func captiveCoreConfig() ledgerbackend.CaptiveCoreConfig { - archiveURLs := []string{ - "https://history.stellar.org/prd/core-testnet/core_testnet_001", - "https://history.stellar.org/prd/core-testnet/core_testnet_002", - "https://history.stellar.org/prd/core-testnet/core_testnet_003", - } - networkPassphrase := "Test SDF Network ; September 2015" + archiveURLs := network.TestNetworkhistoryArchiveURLs + networkPassphrase := network.TestNetworkPassphrase captiveCoreToml, err := ledgerbackend.NewCaptiveCoreToml(ledgerbackend.CaptiveCoreTomlParams{ NetworkPassphrase: networkPassphrase, HistoryArchiveURLs: archiveURLs, diff --git a/services/friendbot/main.go b/services/friendbot/main.go index 22a04b0bed..22e7d4c44d 100644 --- a/services/friendbot/main.go +++ b/services/friendbot/main.go @@ -8,6 +8,7 @@ import ( "github.com/go-chi/chi" "github.com/spf13/cobra" + "github.com/stellar/go/services/friendbot/internal" "github.com/stellar/go/support/app" "github.com/stellar/go/support/config" @@ -29,6 +30,7 @@ type Config struct { BaseFee int64 `toml:"base_fee" valid:"optional"` MinionBatchSize int `toml:"minion_batch_size" valid:"optional"` SubmitTxRetriesAllowed int `toml:"submit_tx_retries_allowed" valid:"optional"` + UseCloudflareIP bool `toml:"use_cloudflare_ip" valid:"optional"` } func main() { @@ -68,7 +70,7 @@ func run(cmd *cobra.Command, args []string) { log.Error(err) os.Exit(1) } - router := initRouter(fb) + router := initRouter(cfg, fb) registerProblems() addr := fmt.Sprintf("0.0.0.0:%d", cfg.Port) @@ -84,8 +86,8 @@ func run(cmd *cobra.Command, args []string) { }) } -func initRouter(fb *internal.Bot) *chi.Mux { - mux := http.NewAPIMux(log.DefaultLogger) +func initRouter(cfg Config, fb *internal.Bot) *chi.Mux { + mux := newMux(cfg) handler := &internal.FriendbotHandler{Friendbot: fb} mux.Get("/", handler.Handle) @@ -97,6 +99,15 @@ func initRouter(fb *internal.Bot) *chi.Mux { return mux } +func newMux(cfg Config) *chi.Mux { + mux := chi.NewRouter() + // first apply XFFMiddleware so we can have the real ip in the subsequent + // middlewares + mux.Use(http.XFFMiddleware(http.XFFMiddlewareConfig{BehindCloudflare: cfg.UseCloudflareIP})) + mux.Use(http.NewAPIMux(log.DefaultLogger).Middlewares()...) + return mux +} + func registerProblems() { problem.RegisterError(sql.ErrNoRows, problem.NotFound) diff --git a/services/friendbot/router_test.go b/services/friendbot/router_test.go new file mode 100644 index 0000000000..292a3253ca --- /dev/null +++ b/services/friendbot/router_test.go @@ -0,0 +1,33 @@ +package main + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stellar/go/support/log" +) + +func TestIPLogging(t *testing.T) { + done := log.DefaultLogger.StartTest(log.InfoLevel) + + mux := newMux(Config{UseCloudflareIP: true}) + mux.Get("/", func(w http.ResponseWriter, request *http.Request) { + w.WriteHeader(http.StatusOK) + }) + recorder := httptest.NewRecorder() + request := httptest.NewRequest("GET", "/", nil) + ipAddress := "255.128.255.128" + request.Header.Set("CF-Connecting-IP", ipAddress) + mux.ServeHTTP(recorder, request) + require.Equal(t, http.StatusOK, recorder.Code) + + logged := done() + require.Len(t, logged, 2) + require.Equal(t, "starting request", logged[0].Message) + require.Equal(t, ipAddress, logged[0].Data["ip"]) + require.Equal(t, "finished request", logged[1].Message) + require.Equal(t, ipAddress, logged[1].Data["ip"]) +} diff --git a/services/horizon/internal/ingest/fsm.go b/services/horizon/internal/ingest/fsm.go index 283cd3347a..c165faf371 100644 --- a/services/horizon/internal/ingest/fsm.go +++ b/services/horizon/internal/ingest/fsm.go @@ -537,9 +537,6 @@ func (r resumeState) run(s *system) (transition, error) { return retryResume(r), err } - duration = time.Since(startTime).Seconds() - s.Metrics().LedgerIngestionDuration.Observe(float64(duration)) - // Update stats metrics changeStatsMap := stats.changeStats.Map() r.addLedgerStatsMetricFromMap(s, "change", changeStatsMap) @@ -560,6 +557,13 @@ func (r resumeState) run(s *system) (transition, error) { // roll up and be reported here as part of resumeState transition addHistoryArchiveStatsMetrics(s, s.historyAdapter.GetStats()) + s.maybeVerifyState(ingestLedger, ledgerCloseMeta.BucketListHash()) + s.maybeReapHistory(ingestLedger) + s.maybeReapLookupTables(ingestLedger) + + duration = time.Since(startTime).Seconds() + s.Metrics().LedgerIngestionDuration.Observe(float64(duration)) + localLog := log.WithFields(logpkg.F{ "sequence": ingestLedger, "duration": duration, @@ -577,10 +581,6 @@ func (r resumeState) run(s *system) (transition, error) { localLog.Info("Processed ledger") - s.maybeVerifyState(ingestLedger, ledgerCloseMeta.BucketListHash()) - s.maybeReapHistory(ingestLedger) - s.maybeReapLookupTables(ingestLedger) - return resumeImmediately(ingestLedger), nil } diff --git a/support/datastore/history_archive.go b/support/datastore/history_archive.go deleted file mode 100644 index 9fd291bac7..0000000000 --- a/support/datastore/history_archive.go +++ /dev/null @@ -1,53 +0,0 @@ -package datastore - -import ( - "context" - - log "github.com/sirupsen/logrus" - - "github.com/stellar/go/historyarchive" - "github.com/stellar/go/network" - "github.com/stellar/go/support/errors" - supportlog "github.com/stellar/go/support/log" - "github.com/stellar/go/support/storage" -) - -const ( - Pubnet = "pubnet" - Testnet = "testnet" -) - -func CreateHistoryArchiveFromNetworkName(ctx context.Context, networkName string, userAgent string, logger *supportlog.Entry) (historyarchive.ArchiveInterface, error) { - var historyArchiveUrls []string - switch networkName { - case Pubnet: - historyArchiveUrls = network.PublicNetworkhistoryArchiveURLs - case Testnet: - historyArchiveUrls = network.TestNetworkhistoryArchiveURLs - default: - return nil, errors.Errorf("Invalid network name %s", networkName) - } - - return historyarchive.NewArchivePool(historyArchiveUrls, historyarchive.ArchiveOptions{ - Logger: logger, - ConnectOptions: storage.ConnectOptions{ - UserAgent: userAgent, - Context: ctx, - }, - }) -} - -func GetLatestLedgerSequenceFromHistoryArchives(archive historyarchive.ArchiveInterface) (uint32, error) { - has, err := archive.GetRootHAS() - if err != nil { - log.Error("Error getting root HAS from archives", err) - return 0, errors.Wrap(err, "failed to retrieve the latest ledger sequence from any history archive") - } - - return has.CurrentLedger, nil -} - -func GetHistoryArchivesCheckPointFrequency() uint32 { - // this could evolve to use other sources for checkpoint freq - return historyarchive.DefaultCheckpointFrequency -} diff --git a/support/datastore/resumablemanager_test.go b/support/datastore/resumablemanager_test.go index 4616f9e4ae..4fc8738b08 100644 --- a/support/datastore/resumablemanager_test.go +++ b/support/datastore/resumablemanager_test.go @@ -282,8 +282,12 @@ func TestResumability(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { mockArchive := &historyarchive.MockArchive{} - mockArchive.On("GetRootHAS").Return(historyarchive.HistoryArchiveState{CurrentLedger: tt.latestLedger}, tt.archiveError).Once() - + mockArchive.On("GetLatestLedgerSequence").Return(tt.latestLedger, tt.archiveError).Once() + if tt.archiveError == nil { + mockArchive.On("GetCheckpointManager"). + Return(historyarchive.NewCheckpointManager( + historyarchive.DefaultCheckpointFrequency)).Once() + } mockDataStore := &MockDataStore{} tt.registerMockCalls(mockDataStore) diff --git a/support/datastore/resumeablemanager.go b/support/datastore/resumeablemanager.go index 35031d73f6..7e6b03df99 100644 --- a/support/datastore/resumeablemanager.go +++ b/support/datastore/resumeablemanager.go @@ -62,12 +62,12 @@ func (rm resumableManagerService) FindStart(ctx context.Context, start, end uint networkLatest := uint32(0) if end < 1 { var latestErr error - networkLatest, latestErr = GetLatestLedgerSequenceFromHistoryArchives(rm.archive) + networkLatest, latestErr = rm.archive.GetLatestLedgerSequence() if latestErr != nil { err := errors.Wrap(latestErr, "Resumability of requested export ledger range, was not able to get latest ledger from network") return 0, false, err } - networkLatest = networkLatest + (GetHistoryArchivesCheckPointFrequency() * 2) + networkLatest = networkLatest + (rm.archive.GetCheckpointManager().GetCheckpointFrequency() * 2) log.Infof("Resumability computed effective latest network ledger including padding of checkpoint frequency to be %d", networkLatest) if start > networkLatest { diff --git a/support/http/logging_middleware.go b/support/http/logging_middleware.go index 2cc957ac68..540dbd1243 100644 --- a/support/http/logging_middleware.go +++ b/support/http/logging_middleware.go @@ -8,6 +8,7 @@ import ( "github.com/go-chi/chi" "github.com/go-chi/chi/middleware" + "github.com/stellar/go/support/http/mutil" "github.com/stellar/go/support/log" ) @@ -136,6 +137,7 @@ func logEndOfRequest( "subsys": "http", "path": r.URL.String(), "method": r.Method, + "ip": r.RemoteAddr, "status": mw.Status(), "bytes": mw.BytesWritten(), "duration": duration, diff --git a/support/http/logging_middleware_test.go b/support/http/logging_middleware_test.go index 0e2eb45bb2..3ba4d651db 100644 --- a/support/http/logging_middleware_test.go +++ b/support/http/logging_middleware_test.go @@ -6,9 +6,10 @@ import ( "github.com/go-chi/chi" "github.com/go-chi/chi/middleware" + "github.com/stretchr/testify/assert" + "github.com/stellar/go/support/http/httptest" "github.com/stellar/go/support/log" - "github.com/stretchr/testify/assert" ) // setXFFMiddleware sets "X-Forwarded-For" header to test LoggingMiddlewareWithOptions. @@ -143,7 +144,7 @@ func TestHTTPMiddlewareWithOptions(t *testing.T) { assert.Equal(t, req1, logged[2].Data["req"]) assert.Equal(t, "/path/1234", logged[2].Data["path"]) assert.Equal(t, "/path/{value}", logged[2].Data["route"]) - assert.Equal(t, 9, len(logged[2].Data)) + assert.Equal(t, 10, len(logged[2].Data)) assert.Equal(t, "starting request", logged[3].Message) assert.Equal(t, "http", logged[3].Data["subsys"]) @@ -162,7 +163,7 @@ func TestHTTPMiddlewareWithOptions(t *testing.T) { assert.Equal(t, req2, logged[4].Data["req"]) assert.Equal(t, "/not_found", logged[4].Data["path"]) assert.Equal(t, "/not_found", logged[4].Data["route"]) - assert.Equal(t, 9, len(logged[4].Data)) + assert.Equal(t, 10, len(logged[4].Data)) assert.Equal(t, "starting request", logged[5].Message) assert.Equal(t, "http", logged[5].Data["subsys"]) @@ -181,7 +182,7 @@ func TestHTTPMiddlewareWithOptions(t *testing.T) { assert.Equal(t, req3, logged[6].Data["req"]) assert.Equal(t, "/really_not_found", logged[6].Data["path"]) assert.Equal(t, "", logged[6].Data["route"]) - assert.Equal(t, 9, len(logged[6].Data)) + assert.Equal(t, 10, len(logged[6].Data)) } } diff --git a/support/http/mux.go b/support/http/mux.go index ca041d3797..1d64f99812 100644 --- a/support/http/mux.go +++ b/support/http/mux.go @@ -4,6 +4,7 @@ import ( "github.com/go-chi/chi" "github.com/go-chi/chi/middleware" "github.com/rs/cors" + "github.com/stellar/go/support/log" )