From 83c5b69d92f92256fbf5002ceceafb0b0c177c5e Mon Sep 17 00:00:00 2001 From: Ehi Enabulele Date: Sat, 12 Oct 2024 12:21:22 +0100 Subject: [PATCH] Update SDK with structured content snapshots --- api.go | 753 --------- api_test.go | 1458 ----------------- pkg/api/api.go | 57 + pkg/api/api_test.go | 245 +++ pkg/api/structuredcontent.go | 23 + .../structured-contents-snapshot.json | 18 + .../structured-contents-snapshots.json | 110 ++ 7 files changed, 453 insertions(+), 2211 deletions(-) delete mode 100644 api.go delete mode 100644 api_test.go create mode 100644 pkg/api/testdata/structured-contents-snapshot.json create mode 100644 pkg/api/testdata/structured-contents-snapshots.json diff --git a/api.go b/api.go deleted file mode 100644 index e258a23..0000000 --- a/api.go +++ /dev/null @@ -1,753 +0,0 @@ -// Package api holds an API client for Wikimedia Enterprise API(s). -package api - -import ( - "archive/tar" - "bufio" - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - "io" - "net/http" - "strconv" - "strings" - "time" - - "github.com/klauspost/pgzip" -) - -// DateFormat is the date format used for the API. -const DateFormat = "2006-01-02" - -// Filter represents a filter to be applied to a dataset. -type Filter struct { - // Field specifies the field in the dataset that the filter should be applied to. - Field string `json:"field"` - - // Value specifies the value that the field should be compared to. - Value interface{} `json:"value"` -} - -// ReadCallback is a function that will be called with each Article object that is read from a batch or snapshot. -// You can return a custom error to stop the reading. -type ReadCallback func(art *Article) error - -// ReadStructuredCallback is a function that will be called with each Article object that is read from a batch or snapshot. -// You can return a custom error to stop the reading. -type ReadStructuredCallback func(art *StructuredContent) error - -// Request contains properties that are used to apply filters to the API. -type Request struct { - // Since is a parameter used only for streaming endpoints. - // Will pick up the reading of stream from this timestamp. - // For the articles endpoint it will be restricted to 48h. - Since *time.Time `json:"since,omitempty"` - - // Fields represents a list of fields to retrieve from the API. - // This is an optional argument. - Fields []string `json:"fields,omitempty"` - - // Filters represents a list of filters to apply to the response. - // This is an optional argument. - Filters []*Filter `json:"filters,omitempty"` - - // Limits the amount of results from the API (for now works only with Articles API). - // This is an optional argument. - Limit int `json:"limit,omitempty"` - - // Provides a way to open parallel connections to realtime streaming API. - // Allows to target subsets of partitions in each of the parallel connections. - // The max allowed number of parallel connections to realtime API is 10, i.e., the allowed range for parts is 0 through 9. - // Each part value lets one connect to 1/10 th of the total partitions. - // e.g., [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] - Parts []int `json:"parts,omitempty"` - - // Used for reconnection to realtime streaming API by passing this parameter. - // This is map of partition:latest offset consumed. - Offsets map[int]int64 `json:"offsets,omitempty"` - - // Used for reconnection to realtime streaming API by passing this parameter. - // This is map of partition:latest event.date_published consumed. - SincePerPartition map[int]time.Time ` json:"since_per_partition,omitempty"` -} - -// CodesGetter is an interface that retrieves codes from the API. -type CodesGetter interface { - GetCodes(ctx context.Context, req *Request) ([]*Code, error) -} - -// CodeGetter is an interface that retrieves a code by ID from the API. -type CodeGetter interface { - GetCode(ctx context.Context, idr string, req *Request) (*Code, error) -} - -// LanguagesGetter is an interface that retrieves languages from the API. -type LanguagesGetter interface { - GetLanguages(ctx context.Context, req *Request) ([]*Language, error) -} - -// LanguageGetter is an interface that retrieves a language by ID from the API. -type LanguageGetter interface { - GetLanguage(ctx context.Context, idr string, req *Request) (*Language, error) -} - -// ProjectsGetter is an interface that retrieves projects from the API. -type ProjectsGetter interface { - GetProjects(ctx context.Context, req *Request) ([]*Project, error) -} - -// ProjectGetter is an interface that retrieves a project by ID from the API. -type ProjectGetter interface { - GetProject(ctx context.Context, idr string, req *Request) (*Project, error) -} - -// NamespacesGetter is an interface that retrieves namespaces from the API. -type NamespacesGetter interface { - GetNamespaces(ctx context.Context, req *Request) ([]*Namespace, error) -} - -// NamespaceGetter is an interface that retrieves a namespace by ID from the API. -type NamespaceGetter interface { - GetNamespace(ctx context.Context, idr int, req *Request) (*Namespace, error) -} - -// BatchesGetter is an interface that retrieves batches from the API. -type BatchesGetter interface { - GetBatches(ctx context.Context, dte *time.Time, req *Request) ([]*Batch, error) -} - -// BatchGetter is an interface that retrieves a realtime batch by ID from the API. -type BatchGetter interface { - GetBatch(ctx context.Context, dte *time.Time, idr string, req *Request) (*Batch, error) -} - -// BatchHeader is an interface that retrieves the header of a realtime batch by ID from the API. -type BatchHeader interface { - HeadBatch(ctx context.Context, dte *time.Time, idr string) (*Headers, error) -} - -// BatchReader is an interface that reads a realtime batch data by ID from the API. -type BatchReader interface { - ReadBatch(ctx context.Context, dte *time.Time, idr string, cbk ReadCallback) error -} - -// BatchDownloader is an interface that downloads a realtime batch `tar.gz` by ID file from the API. -type BatchDownloader interface { - DownloadBatch(ctx context.Context, dte *time.Time, idr string, wsk io.WriteSeeker) error -} - -// SnapshotsGetter is an interface for getting multiple snapshots. -type SnapshotsGetter interface { - GetSnapshots(ctx context.Context, req *Request) ([]*Snapshot, error) -} - -// SnapshotGetter is an interface for getting a single snapshot by ID. -type SnapshotGetter interface { - GetSnapshot(ctx context.Context, idr string, req *Request) (*Snapshot, error) -} - -// SnapshotHeader is an interface for getting the headers of a single snapshot by ID. -type SnapshotHeader interface { - HeadSnapshot(ctx context.Context, idr string) (*Headers, error) -} - -// SnapshotDownloader is an interface for downloading a single snapshot by ID to a writer. -type SnapshotDownloader interface { - DownloadSnapshot(ctx context.Context, idr string, wsk io.WriteSeeker) error -} - -// SnapshotReader is an interface for reading the contents of a single snapshot by ID with a callback function. -type SnapshotReader interface { - ReadSnapshot(ctx context.Context, idr string, cbk ReadCallback) error -} - -// ArticlesGetter is an interface for getting a lits of articles by name. -type ArticlesGetter interface { - GetArticles(ctx context.Context, nme string, req *Request) ([]*Article, error) -} - -// StructuredContentsGetter is an interface for getting a lits of structured contents by name. -type StructuredContentsGetter interface { - GetStructuredContents(ctx context.Context, nme string, req *Request) ([]*StructuredContent, error) -} - -// StructuredSnapshotsGetter is an interface for getting multiple snapshots. -type StructuredSnapshotsGetter interface { - GetStructuredSnapshots(ctx context.Context, req *Request) ([]*Snapshot, error) -} - -// StructuredSnapshotGetter is an interface for getting a single snapshot by ID. -type StructuredSnapshotGetter interface { - GetStructuredSnapshot(ctx context.Context, idr string, req *Request) (*Snapshot, error) -} - -// StructuredSnapshotHeader is an interface for getting the headers of a single snapshot by ID. -type StructuredSnapshotHeader interface { - HeadStructuredSnapshot(ctx context.Context, idr string) (*Headers, error) -} - -// StructuredSnapshotDownloader is an interface for downloading a single snapshot by ID to a writer. -type StructuredSnapshotDownloader interface { - DownloadStructuredSnapshot(ctx context.Context, idr string, wsk io.WriteSeeker) error -} - -// SnapshotReader is an interface for reading the contents of a single snapshot by ID with a callback function. -type StructuredSnapshotReader interface { - ReadStructuredSnapshot(ctx context.Context, idr string, cbk ReadStructuredCallback) error -} - -// ArticlesStreamer is an interface for getting all the article changes in realtime. -type ArticlesStreamer interface { - StreamArticles(ctx context.Context, req *Request, cbk ReadCallback) error -} - -// AllReader is an interface for reading all the contents of a reader with a callback function. -type AllReader interface { - ReadAll(ctx context.Context, rdr io.Reader, cbk ReadCallback) error -} - -// AccessTokenSetter is an interface for setting an access token. -type AccessTokenSetter interface { - SetAccessToken(tkn string) -} - -// API interface tha encapsulates the whole functionality of the client. -// Can be used with composition in unit testing. -type API interface { - AllReader - AccessTokenSetter - CodesGetter - CodeGetter - LanguagesGetter - LanguageGetter - ProjectsGetter - ProjectGetter - NamespacesGetter - NamespaceGetter - BatchesGetter - BatchGetter - BatchHeader - BatchReader - BatchDownloader - SnapshotsGetter - SnapshotGetter - SnapshotHeader - SnapshotReader - SnapshotDownloader - ArticlesGetter - StructuredContentsGetter - StructuredSnapshotsGetter - StructuredSnapshotGetter - StructuredSnapshotHeader - StructuredSnapshotReader - StructuredSnapshotDownloader - ArticlesStreamer -} - -// NewClient returns a new instance of the Client that implements the API interface. -// The function takes in optional functional options that allow the caller to configure -// the client with custom settings. -func NewClient(ops ...func(clt *Client)) API { - clt := &Client{ - HTTPClient: &http.Client{}, - DownloadMinChunkSize: 5242880, - DownloadChunkSize: 5242880 * 5, - DownloadConcurrency: 10, - ScannerBufferSize: 20971520, - UserAgent: "", - BaseURL: "https://api.enterprise.wikimedia.com/", - RealtimeURL: "https://realtime.enterprise.wikimedia.com/", - } - - for _, opt := range ops { - opt(clt) - } - - return clt -} - -// Client is a struct that represents an HTTP client used to interact with the API. -type Client struct { - // HTTPClient is the HTTP client used to send requests. - HTTPClient *http.Client - - // UserAgent is the user-agent header value sent with each request. - UserAgent string - - // BaseUrl is the base URL for all API requests. - BaseURL string - - // RealtimeURL is the base URL for all realtime API requests. - RealtimeURL string - - // AccessToken is the access token used to authenticate requests. - AccessToken string - - // DownloadMinChunkSize is the minimum chunk size used for downloading resources. - DownloadMinChunkSize int - - // DownloadChunkSize is the chunk size used for downloading resources. - DownloadChunkSize int - - // DownloadConcurrency is the number of simultaneous downloads allowed. - DownloadConcurrency int - - // ScannerBufferSize is the buffer size for the scanner when it reads from the API. - ScannerBufferSize int -} - -func (c *Client) newRequest(ctx context.Context, url string, mtd string, pth string, req *Request) (*http.Request, error) { - dta := []byte{} - - if req != nil { - bdy, err := json.Marshal(req) - - if err != nil { - return nil, err - } - - dta = bdy - } - - hrq, err := http.NewRequestWithContext(ctx, mtd, fmt.Sprintf("%sv2/%s", url, pth), bytes.NewReader(dta)) - - if err != nil { - return nil, err - } - - hrq.Header.Set("User-Agent", c.UserAgent) - hrq.Header.Set("Content-Type", "application/json") - hrq.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.AccessToken)) - - return hrq, nil -} - -func (c *Client) do(hrq *http.Request) (*http.Response, error) { - res, err := c.HTTPClient.Do(hrq) - - if err != nil { - return nil, err - } - - if res.StatusCode < http.StatusOK || res.StatusCode > http.StatusIMUsed { - dta, err := io.ReadAll(res.Body) - defer res.Body.Close() - - if err != nil { - return nil, err - } - - if len(string(dta)) == 0 { - return nil, errors.New(res.Status) - } - - return nil, errors.New(string(dta)) - } - - return res, nil -} - -func (c *Client) getEntity(ctx context.Context, req *Request, pth string, val interface{}) error { - hrq, err := c.newRequest(ctx, c.BaseURL, http.MethodPost, pth, req) - - if err != nil { - return err - } - - res, err := c.do(hrq) - - if err != nil { - return err - } - - defer res.Body.Close() - return json.NewDecoder(res.Body).Decode(val) -} - -func (c *Client) readLoop(ctx context.Context, rdr io.Reader, cbk ReadCallback) error { - scn := bufio.NewScanner(rdr) - scn.Buffer([]byte{}, c.ScannerBufferSize) - - for scn.Scan() { - art := new(Article) - - if err := json.Unmarshal(scn.Bytes(), art); err != nil { - return err - } - - if err := cbk(art); err != nil { - return err - } - } - - return nil -} - -func (c *Client) readEntity(ctx context.Context, pth string, cbk ReadCallback) error { - hrq, err := c.newRequest(ctx, c.BaseURL, http.MethodGet, pth, nil) - - if err != nil { - return err - } - - res, err := c.do(hrq) - - if err != nil { - return err - } - - defer res.Body.Close() - return c.ReadAll(ctx, res.Body, cbk) -} - -func (c *Client) headEntity(ctx context.Context, pth string) (*Headers, error) { - hrq, err := c.newRequest(ctx, c.BaseURL, http.MethodHead, pth, nil) - - if err != nil { - return nil, err - } - - res, err := c.do(hrq) - - if err != nil { - return nil, err - } - - hdr := &Headers{ - ETag: strings.Trim(res.Header.Get("ETag"), "\""), - ContentType: res.Header.Get("Content-Type"), - AcceptRanges: res.Header.Get("Accept-Ranges"), - } - - if lmf := res.Header.Get("Last-Modified"); len(lmf) > 0 { - lmd, err := time.Parse(time.RFC1123, lmf) - - if err != nil { - return nil, err - } - - hdr.LastModified = &lmd - } - - if ctl := res.Header.Get("Content-Length"); len(ctl) > 0 { - cti, err := strconv.Atoi(ctl) - - if err != nil { - return nil, err - } - - hdr.ContentLength = cti - } - - return hdr, nil -} - -type chunk struct { - start int - end int - data []byte -} - -func (c *Client) downloadEntity(ctx context.Context, pth string, wrr io.WriteSeeker) error { - hds, err := c.headEntity(ctx, pth) - - if err != nil { - return err - } - - csz := c.DownloadChunkSize - - if hds.ContentLength < c.DownloadMinChunkSize { - csz = c.DownloadMinChunkSize - } - - cks := []*chunk{} - - for i := 0; true; i++ { - cnk := &chunk{ - start: i * csz, - end: (i * csz) + csz, - } - - if cnk.end > hds.ContentLength { - cnk.end = hds.ContentLength - } - - cks = append(cks, cnk) - - if cnk.end == hds.ContentLength { - break - } - } - - ers := make(chan error, len(cks)*2) - cds := make(chan *chunk, len(cks)) - - go func() { - for cnk := range cds { - if _, err := wrr.Seek(int64(cnk.start), 0); err != nil { - ers <- err - return - } - - if _, err := io.CopyN(wrr, bytes.NewReader(cnk.data), int64(cnk.end-cnk.start)); err != nil { - ers <- err - return - } - - ers <- nil - } - }() - - dcs := c.DownloadConcurrency - smr := make(chan struct{}, dcs) - - for _, cnk := range cks { - go func(cnk *chunk) { - smr <- struct{}{} - defer func() { - ers <- nil - <-smr - }() - - hrq, err := c.newRequest(ctx, c.BaseURL, http.MethodGet, pth, nil) - hrq.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", cnk.start, cnk.end)) - - if err != nil { - ers <- err - return - } - - res, err := c.do(hrq) - - if err != nil { - ers <- err - return - } - - defer res.Body.Close() - cnk.data, err = io.ReadAll(res.Body) - - if err != nil { - ers <- err - return - } - - cds <- cnk - }(cnk) - } - - for i := 0; i < cap(ers); i++ { - if err := <-ers; err != nil { - return err - } - } - - close(cds) - - return nil -} - -func (c *Client) subscribeToEntity(ctx context.Context, pth string, req *Request, cbk ReadCallback) error { - hrq, err := c.newRequest(ctx, c.RealtimeURL, http.MethodGet, pth, req) - - if err != nil { - return err - } - - hrq.Header.Set("Cache-Control", "no-cache") - hrq.Header.Set("Accept", "application/x-ndjson") - hrq.Header.Set("Connection", "keep-alive") - res, err := c.do(hrq) - - if err != nil { - return err - } - - defer res.Body.Close() - return c.readLoop(ctx, res.Body, cbk) -} - -// ReadAll reads the contents of the given io.Reader and calls the given ReadCallback function -// with each chunk of data read. -func (c *Client) ReadAll(ctx context.Context, rdr io.Reader, cbk ReadCallback) error { - gzr, err := pgzip.NewReader(rdr) - - if err != nil { - return err - } - - trr := tar.NewReader(gzr) - - for { - _, err := trr.Next() - - if err == io.EOF { - break - } - - if err != nil { - return err - } - - if err := c.readLoop(ctx, trr, cbk); err != nil { - return err - } - } - - return nil -} - -// SetAccessToken sets the access token for the client. -func (c *Client) SetAccessToken(tkn string) { - c.AccessToken = tkn -} - -// GetCodes retrieves a list of codes, and returns an error if any. -func (c *Client) GetCodes(ctx context.Context, req *Request) ([]*Code, error) { - cds := []*Code{} - return cds, c.getEntity(ctx, req, "codes", &cds) -} - -// GetCode retrieves a code by ID, and returns an error if any. -func (c *Client) GetCode(ctx context.Context, idr string, req *Request) (*Code, error) { - cde := new(Code) - return cde, c.getEntity(ctx, req, fmt.Sprintf("codes/%s", idr), cde) -} - -// GetLanguages retrieves a list of languages, and returns an error if any. -func (c *Client) GetLanguages(ctx context.Context, req *Request) ([]*Language, error) { - lgs := []*Language{} - return lgs, c.getEntity(ctx, req, "languages", &lgs) -} - -// GetLanguage retrieves a language by ID, and returns an error if any. -func (c *Client) GetLanguage(ctx context.Context, idr string, req *Request) (*Language, error) { - lng := new(Language) - return lng, c.getEntity(ctx, req, fmt.Sprintf("languages/%s", idr), lng) -} - -// GetProjects retrieves a list of projects, and returns an error if any. -func (c *Client) GetProjects(ctx context.Context, req *Request) ([]*Project, error) { - prs := []*Project{} - return prs, c.getEntity(ctx, req, "projects", &prs) -} - -// GetProject retrieves a project by ID, and returns an error if any. -func (c *Client) GetProject(ctx context.Context, idr string, req *Request) (*Project, error) { - prj := new(Project) - return prj, c.getEntity(ctx, req, fmt.Sprintf("projects/%s", idr), prj) -} - -// GetNamespaces retrieves a list of namespaces, and returns an error if any. -func (c *Client) GetNamespaces(ctx context.Context, req *Request) ([]*Namespace, error) { - nss := []*Namespace{} - return nss, c.getEntity(ctx, req, "namespaces", &nss) -} - -// GetNamespaces retrieves a namespaces by ID, and returns an error if any. -func (c *Client) GetNamespace(ctx context.Context, idr int, req *Request) (*Namespace, error) { - nsp := new(Namespace) - return nsp, c.getEntity(ctx, req, fmt.Sprintf("namespaces/%d", idr), nsp) -} - -// GetBatches retrieves a list of batches for a specific date and request, and returns an error if any. -func (c *Client) GetBatches(ctx context.Context, dte *time.Time, req *Request) ([]*Batch, error) { - bts := []*Batch{} - return bts, c.getEntity(ctx, req, fmt.Sprintf("batches/%s", dte.Format(DateFormat)), &bts) -} - -// GetBatch retrieves a single batch for a specific date and ID, and returns an error if any. -func (c *Client) GetBatch(ctx context.Context, dte *time.Time, idr string, req *Request) (*Batch, error) { - bth := new(Batch) - return bth, c.getEntity(ctx, req, fmt.Sprintf("batches/%s/%s", dte.Format(DateFormat), idr), bth) -} - -// HeadBatch retrieves only the headers of a single batch for a specific date and ID, and returns an error if any. -func (c *Client) HeadBatch(ctx context.Context, dte *time.Time, idr string) (*Headers, error) { - return c.headEntity(ctx, fmt.Sprintf("batches/%s/%s/download", dte.Format(DateFormat), idr)) -} - -// ReadBatch reads the contents of a single batch for a specific date and ID, and invokes the specified callback function for each chunk read. -func (c *Client) ReadBatch(ctx context.Context, dte *time.Time, idr string, cbk ReadCallback) error { - return c.readEntity(ctx, fmt.Sprintf("batches/%s/%s/download", dte.Format(DateFormat), idr), cbk) -} - -// DownloadBatch downloads the contents of a single batch for a specific date and ID, and writes the data to the specified WriteSeeker. -func (c *Client) DownloadBatch(ctx context.Context, dte *time.Time, idr string, wsk io.WriteSeeker) error { - return c.downloadEntity(ctx, fmt.Sprintf("batches/%s/%s/download", dte.Format(DateFormat), idr), wsk) -} - -// GetSnapshots retrieves a list of all snapshots and returns an error if any. -func (c *Client) GetSnapshots(ctx context.Context, req *Request) ([]*Snapshot, error) { - sps := []*Snapshot{} - return sps, c.getEntity(ctx, req, "snapshots", &sps) -} - -// GetSnapshot retrieves a single snapshot for a specific ID and returns an error if any. -func (c *Client) GetSnapshot(ctx context.Context, idr string, req *Request) (*Snapshot, error) { - snp := new(Snapshot) - return snp, c.getEntity(ctx, req, fmt.Sprintf("snapshots/%s", idr), snp) -} - -// HeadSnapshot retrieves only the headers of a single snapshot for a specific ID, and returns an error if any. -func (c *Client) HeadSnapshot(ctx context.Context, idr string) (*Headers, error) { - return c.headEntity(ctx, fmt.Sprintf("snapshots/%s/download", idr)) -} - -// ReadSnapshot reads the contents of a single snapshots for a specific ID, and invokes the specified callback function for each chunk read. -func (c *Client) ReadSnapshot(ctx context.Context, idr string, cbk ReadCallback) error { - return c.readEntity(ctx, fmt.Sprintf("snapshots/%s/download", idr), cbk) -} - -// DownloadSnapshot downloads the contents of a single snapshot for a specific ID, and writes the data to the specified WriteSeeker. -func (c *Client) DownloadSnapshot(ctx context.Context, idr string, wsk io.WriteSeeker) error { - return c.downloadEntity(ctx, fmt.Sprintf("snapshots/%s/download", idr), wsk) -} - -// GetArticles retrieves articles from the API based on the given name and request parameters. -func (c *Client) GetArticles(ctx context.Context, nme string, req *Request) ([]*Article, error) { - ats := []*Article{} - return ats, c.getEntity(ctx, req, fmt.Sprintf("articles/%s", nme), &ats) -} - -// GetStructuredContents retrieves structured contents from the API based on the given name and request parameters. -func (c *Client) GetStructuredContents(ctx context.Context, nme string, req *Request) ([]*StructuredContent, error) { - ats := []*StructuredContent{} - return ats, c.getEntity(ctx, req, fmt.Sprintf("structured-contents/%s", nme), &ats) -} - -// GetStructuredSnapshots retrieves a list of all snapshots and returns an error if any. -func (c *Client) GetStructuredSnapshots(ctx context.Context, req *Request) ([]*Snapshot, error) { - sps := []*Snapshot{} - return sps, c.getEntity(ctx, req, "snapshots/structured-contents", &sps) -} - -// GetStructuredSnapshot retrieves a single snapshot for a specific ID and returns an error if any. -func (c *Client) GetStructuredSnapshot(ctx context.Context, idr string, req *Request) (*Snapshot, error) { - snp := new(Snapshot) - return snp, c.getEntity(ctx, req, fmt.Sprintf("snapshots/structured-contents/%s", idr), snp) -} - -// HeadStructuredSnapshot retrieves only the headers of a single snapshot for a specific ID, and returns an error if any. -func (c *Client) HeadStructuredSnapshot(ctx context.Context, idr string) (*Headers, error) { - return c.headEntity(ctx, fmt.Sprintf("snapshots/structured-contents/%s/download", idr)) -} - -// ReadStructuredSnapshot reads the contents of a single snapshots for a specific ID, and invokes the specified callback function for each chunk read. -func (c *Client) ReadStructuredSnapshot(ctx context.Context, idr string, cbk ReadCallback) error { - return c.readEntity(ctx, fmt.Sprintf("snapshots/structured-contents/%s/download", idr), cbk) -} - -// DownloadStructuredSnapshot downloads the contents of a single snapshot for a specific ID, and writes the data to the specified WriteSeeker. -func (c *Client) DownloadStructuredSnapshot(ctx context.Context, idr string, wsk io.WriteSeeker) error { - return c.downloadEntity(ctx, fmt.Sprintf("snapshots/structured-contents/%s/download", idr), wsk) -} - -// StreamArticles streams all available articles from the server and applies a callback function to each article -// as they arrive. The callback function must implement the ReadCallback interface. -func (c *Client) StreamArticles(ctx context.Context, req *Request, cbk ReadCallback) error { - return c.subscribeToEntity(ctx, "articles", req, cbk) -} diff --git a/api_test.go b/api_test.go deleted file mode 100644 index 7646984..0000000 --- a/api_test.go +++ /dev/null @@ -1,1458 +0,0 @@ -package api_test - -import ( - "context" - "embed" - "fmt" - "io" - "log" - "net/http" - "net/http/httptest" - "os" - "testing" - "time" - - "github.com/stretchr/testify/suite" - "github.com/wikimedia-enterprise/wme-sdk-go/pkg/api" -) - -//go:embed testdata/* -var testData embed.FS - -type newClientTestSuite struct { - suite.Suite -} - -func (s *newClientTestSuite) TestNewClient() { - clt := api.NewClient().(*api.Client) - - s.NotNil(clt) - s.NotNil(clt.HTTPClient) - s.NotZero(clt.DownloadMinChunkSize) - s.NotZero(clt.DownloadChunkSize) - s.NotZero(clt.DownloadConcurrency) - s.NotZero(clt.ScannerBufferSize) - s.NotZero(clt.BaseURL) - s.NotZero(clt.RealtimeURL) -} - -func (s *newClientTestSuite) TestNewClientWithOpts() { - httpClient := new(http.Client) - downloadMinChunkSize := 100 - downloadChunkSize := 25 - downloadConcurrency := 2 - scannerBufferSize := 100 - baseURL := "https://foo.bar" - realtimeURL := "https://foo.bar/realtime" - opt := func(clt *api.Client) { - clt.HTTPClient = httpClient - clt.DownloadMinChunkSize = downloadMinChunkSize - clt.DownloadChunkSize = downloadChunkSize - clt.DownloadConcurrency = downloadConcurrency - clt.ScannerBufferSize = scannerBufferSize - clt.BaseURL = baseURL - clt.RealtimeURL = realtimeURL - } - clt := api.NewClient(opt).(*api.Client) - - s.NotNil(clt) - s.Equal(httpClient, clt.HTTPClient) - s.Equal(downloadMinChunkSize, clt.DownloadMinChunkSize) - s.Equal(downloadChunkSize, clt.DownloadChunkSize) - s.Equal(downloadConcurrency, clt.DownloadConcurrency) - s.Equal(scannerBufferSize, clt.ScannerBufferSize) - s.Equal(baseURL, clt.BaseURL) - s.Equal(realtimeURL, clt.RealtimeURL) -} - -func TestNewClient(t *testing.T) { - suite.Run(t, new(newClientTestSuite)) -} - -type readAllTestSuite struct { - suite.Suite - ctx context.Context - rcr io.ReadCloser - clt api.API -} - -func (s *readAllTestSuite) SetupSuite() { - var err error - s.rcr, err = testData.Open("testdata/simplewiki_namespace_0.tar.gz") - s.NoError(err) - - s.clt = api.NewClient() -} - -func (s *readAllTestSuite) TearDownSuite() { - s.rcr.Close() -} - -func (s *readAllTestSuite) TestReadAll() { - nmc := 0 - err := s.clt.ReadAll(s.ctx, s.rcr, func(art *api.Article) error { - s.NotEmpty(art.Name) - s.NotEmpty(art.Identifier) - nmc++ - return nil - }) - - s.NoError(err) - s.NotZero(nmc) -} - -func TestReadAll(t *testing.T) { - suite.Run(t, new(readAllTestSuite)) -} - -type setAccessTokenTestSuite struct { - suite.Suite - clt *api.Client - tkn string -} - -func (s *setAccessTokenTestSuite) SetupTest() { - s.tkn = "foo" - s.clt = new(api.Client) -} - -func (s *setAccessTokenTestSuite) TestSetAccessToken() { - s.clt.SetAccessToken(s.tkn) - - s.Equal(s.tkn, s.clt.AccessToken) -} - -func TestSetAccessToken(t *testing.T) { - suite.Run(t, new(setAccessTokenTestSuite)) -} - -type baseEntityTestSuite struct { - suite.Suite - sts int - fph string - pth string - ctx context.Context - req *api.Request - srv *httptest.Server - clt api.API - mtd string -} - -func (s *baseEntityTestSuite) SetupSuite() { - rtr := http.NewServeMux() - var hdr func(w http.ResponseWriter, r *http.Request) - - switch s.mtd { - case http.MethodHead: - hdr = func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Length", "4827640") - w.Header().Set("Content-Type", "binary/octet-stream") - w.Header().Set("ETag", "528262227e37be50594b5a0ac0bcb752") - w.Header().Set("Last-Modified", "Mon, 04 Sep 2023 11:08:50 UTC") - w.WriteHeader(s.sts) - } - case http.MethodGet: - default: - fle, err := testData.Open(s.fph) - - if err != nil { - log.Fatal(err) - } - - dta, err := io.ReadAll(fle) - - if err != nil { - log.Fatal(err) - } - - hdr = func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(s.sts) - w.Header().Set("Content-Type", "application/octet-stream") - _, _ = w.Write(dta) - } - } - - rtr.HandleFunc(fmt.Sprintf("/v2/%s", s.pth), hdr) - - s.ctx = context.Background() - s.req = new(api.Request) - s.srv = httptest.NewServer(rtr) - s.clt = api.NewClient(func(clt *api.Client) { - clt.BaseURL = fmt.Sprintf("%s/", s.srv.URL) - clt.RealtimeURL = fmt.Sprintf("%s/", s.srv.URL) - }) -} - -func (s *baseEntityTestSuite) TearDownSuite() { - s.srv.Close() -} - -type getCodesTestSuite struct { - baseEntityTestSuite -} - -func (s *getCodesTestSuite) SetupSuite() { - s.pth = "codes" - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getCodesTestSuite) TestGetCodes() { - cds, err := s.clt.GetCodes(s.ctx, s.req) - - for _, cde := range cds { - s.NotEmpty(cde.Identifier) - s.NotEmpty(cde.Name) - s.NotEmpty(cde.Description) - } - - if s.sts != http.StatusOK { - s.Empty(cds) - s.Error(err) - } else { - s.NotEmpty(cds) - s.NoError(err) - } -} - -func TestGetCodes(t *testing.T) { - for _, tcs := range []*getCodesTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/codes.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getCodeTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *getCodeTestSuite) SetupSuite() { - s.idr = "simplewiki_namepace_0" - s.pth = fmt.Sprintf("codes/%s", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getCodeTestSuite) TestGetCode() { - cde, err := s.clt.GetCode(s.ctx, s.idr, s.req) - - if s.sts != http.StatusOK { - s.Empty(cde.Identifier) - s.Empty(cde.Name) - s.Empty(cde.Description) - s.Error(err) - } else { - s.NotEmpty(cde.Identifier) - s.NotEmpty(cde.Name) - s.NotEmpty(cde.Description) - s.NoError(err) - } -} - -func TestGetCode(t *testing.T) { - for _, tcs := range []*getCodeTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/code.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getLanguagesTestSuite struct { - baseEntityTestSuite -} - -func (s *getLanguagesTestSuite) SetupSuite() { - s.pth = "languages" - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getLanguagesTestSuite) TestGetLanguages() { - lgs, err := s.clt.GetLanguages(s.ctx, s.req) - - for _, lng := range lgs { - s.NotEmpty(lng.Identifier) - s.NotEmpty(lng.Name) - s.NotEmpty(lng.AlternateName) - s.NotEmpty(lng.Direction) - } - - if s.sts != http.StatusOK { - s.Empty(lgs) - s.Error(err) - } else { - s.NotEmpty(lgs) - s.NoError(err) - } -} - -func TestGetLanguages(t *testing.T) { - for _, tcs := range []*getLanguagesTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/languages.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getLanguageTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *getLanguageTestSuite) SetupSuite() { - s.idr = "en" - s.pth = fmt.Sprintf("languages/%s", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getLanguageTestSuite) TestGetLanguage() { - lng, err := s.clt.GetLanguage(s.ctx, s.idr, s.req) - - if s.sts != http.StatusOK { - s.Empty(lng.Identifier) - s.Empty(lng.Name) - s.Empty(lng.AlternateName) - s.Empty(lng.Direction) - s.Error(err) - } else { - s.NotEmpty(lng.Identifier) - s.NotEmpty(lng.Name) - s.NotEmpty(lng.AlternateName) - s.NotEmpty(lng.Direction) - s.NoError(err) - } -} - -func TestGetLanguage(t *testing.T) { - for _, tcs := range []*getLanguageTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/language.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getProjectsTestSuite struct { - baseEntityTestSuite -} - -func (s *getProjectsTestSuite) SetupSuite() { - s.pth = "projects" - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getProjectsTestSuite) TestGetProjects() { - prs, err := s.clt.GetProjects(s.ctx, s.req) - - for _, prj := range prs { - s.NotEmpty(prj.Name) - s.NotEmpty(prj.Identifier) - s.NotEmpty(prj.URL) - s.NotEmpty(prj.Code) - s.NotNil(prj.InLanguage) - s.NotEmpty(prj.InLanguage.Identifier) - } - - if s.sts != http.StatusOK { - s.Empty(prs) - s.Error(err) - } else { - s.NotEmpty(prs) - s.NoError(err) - } -} - -func TestGetProjects(t *testing.T) { - for _, tcs := range []*getProjectsTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/projects.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getProjectTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *getProjectTestSuite) SetupSuite() { - s.idr = "enwiki" - s.pth = fmt.Sprintf("projects/%s", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getProjectTestSuite) TestGetProject() { - prj, err := s.clt.GetProject(s.ctx, s.idr, s.req) - - if s.sts != http.StatusOK { - s.Empty(prj.Name) - s.Empty(prj.Identifier) - s.Empty(prj.URL) - s.Empty(prj.Code) - s.Nil(prj.InLanguage) - s.Error(err) - } else { - s.NotEmpty(prj.Name) - s.NotEmpty(prj.Identifier) - s.NotEmpty(prj.URL) - s.NotEmpty(prj.Code) - s.NotNil(prj.InLanguage) - s.NotEmpty(prj.InLanguage.Identifier) - s.NoError(err) - } -} - -func TestGetProject(t *testing.T) { - for _, tcs := range []*getProjectTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/project.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getNamespacesTestSuite struct { - baseEntityTestSuite -} - -func (s *getNamespacesTestSuite) SetupSuite() { - s.pth = "namespaces" - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getNamespacesTestSuite) TestGetNamespaces() { - nss, err := s.clt.GetNamespaces(s.ctx, s.req) - - for _, nsp := range nss { - if nsp.Identifier != 0 { - s.NotEmpty(nsp.Identifier) - } - - s.NotEmpty(nsp.Name) - s.NotEmpty(nsp.Description) - } - - if s.sts != http.StatusOK { - s.Empty(nss) - s.Error(err) - } else { - s.NotEmpty(nss) - s.NoError(err) - } -} - -func TestGetNamespaces(t *testing.T) { - for _, tcs := range []*getNamespacesTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/namespaces.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getNamespaceTestSuite struct { - baseEntityTestSuite - idr int -} - -func (s *getNamespaceTestSuite) SetupSuite() { - s.idr = 14 - s.pth = fmt.Sprintf("namespaces/%d", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getNamespaceTestSuite) TestGetNamespace() { - nsp, err := s.clt.GetNamespace(s.ctx, s.idr, s.req) - - if s.sts != http.StatusOK { - s.Empty(nsp.Identifier) - s.Empty(nsp.Name) - s.Empty(nsp.Description) - s.Error(err) - } else { - s.NotEmpty(nsp.Identifier) - s.NotEmpty(nsp.Name) - s.NotEmpty(nsp.Description) - s.NoError(err) - } -} - -func TestGetNamespace(t *testing.T) { - for _, tcs := range []*getNamespaceTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/namespace.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getBatchesTestSuite struct { - baseEntityTestSuite - dte *time.Time -} - -func (s *getBatchesTestSuite) SetupSuite() { - dtn := time.Now() - s.dte = &dtn - s.pth = fmt.Sprintf("batches/%s", s.dte.Format(api.DateFormat)) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getBatchesTestSuite) TestGetBatches() { - bts, err := s.clt.GetBatches(s.ctx, s.dte, s.req) - - for _, bth := range bts { - s.NotEmpty(bth.Identifier) - s.NotEmpty(bth.Version) - s.NotEmpty(bth.DateModified) - s.NotEmpty(bth.IsPartOf) - s.NotEmpty(bth.InLanguage) - s.NotNil(bth.Namespace) - s.NotEmpty(bth.Size) - } - - if s.sts != http.StatusOK { - s.Empty(bts) - s.Error(err) - } else { - s.NotEmpty(bts) - s.NoError(err) - } -} - -func TestGetBatches(t *testing.T) { - for _, tcs := range []*getBatchesTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/batches.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getBatchTestSuite struct { - baseEntityTestSuite - dte *time.Time - idr string -} - -func (s *getBatchTestSuite) SetupSuite() { - dtn := time.Now() - s.dte = &dtn - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("batches/%s/%s", s.dte.Format(api.DateFormat), s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getBatchTestSuite) TestGetBatch() { - bth, err := s.clt.GetBatch(s.ctx, s.dte, s.idr, s.req) - - if s.sts != http.StatusOK { - s.Empty(bth) - s.Error(err) - } else { - s.NotEmpty(bth.Identifier) - s.NotEmpty(bth.Version) - s.NotEmpty(bth.DateModified) - s.NotEmpty(bth.IsPartOf) - s.NotEmpty(bth.InLanguage) - s.NotNil(bth.Namespace) - s.NotEmpty(bth.Size) - s.NoError(err) - } -} - -func TestGetBatch(t *testing.T) { - for _, tcs := range []*getBatchTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/batch.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type headBatchTestSuite struct { - baseEntityTestSuite - idr string - dte *time.Time -} - -func (s *headBatchTestSuite) SetupSuite() { - dtn := time.Now() - s.dte = &dtn - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("batches/%s/%s/download", s.dte.Format(api.DateFormat), s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *headBatchTestSuite) TestHeadBatch() { - bth, err := s.clt.HeadBatch(s.ctx, s.dte, s.idr) - - if s.sts != http.StatusOK { - s.Empty(bth) - s.Error(err) - } else { - s.NotEmpty(bth) - s.NotEmpty(bth.ContentLength) - s.NotEmpty(bth.ContentType) - s.NotEmpty(bth.ETag) - s.NotEmpty(bth.LastModified) - s.NoError(err) - } -} - -func TestHeadBatch(t *testing.T) { - for _, tcs := range []*headBatchTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - mtd: http.MethodHead, - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type downloadBatchTestSuite struct { - baseEntityTestSuite - idr string - dte *time.Time -} - -func (s *downloadBatchTestSuite) SetupSuite() { - dtn := time.Now() - s.dte = &dtn - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("batches/%s/%s/download", s.dte.Format(api.DateFormat), s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *downloadBatchTestSuite) TestDownloadBatch() { - tmf, err := os.CreateTemp("", "bth_tmp.tar.gz") - - if err != nil { - log.Fatal(err) - } - - defer tmf.Close() - err = s.clt.DownloadBatch(s.ctx, s.dte, s.idr, tmf) - - if s.sts != http.StatusOK { - s.Error(err) - } else { - s.NoError(err) - } -} - -func TestDownloadBatch(t *testing.T) { - for _, tcs := range []*downloadBatchTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/simplewiki_namespace_0.tar.gz", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type readBathTestSuite struct { - baseEntityTestSuite - idr string - dte *time.Time -} - -func (s *readBathTestSuite) SetupSuite() { - dtn := time.Now() - s.dte = &dtn - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("batches/%s/%s/download", s.dte.Format(api.DateFormat), s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *readBathTestSuite) TestReadBatch() { - nmc := 0 - err := s.clt.ReadBatch(s.ctx, s.dte, s.idr, func(art *api.Article) error { - s.NotEmpty(art.Name) - s.NotEmpty(art.Identifier) - nmc++ - return nil - }) - - if s.sts != http.StatusOK { - s.Error(err) - s.Zero(nmc) - } else { - s.NoError(err) - s.NotZero(nmc) - } -} - -func TestReadBatch(t *testing.T) { - for _, tcs := range []*readBathTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/simplewiki_namespace_0.tar.gz", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getSnapshotsTestSuite struct { - baseEntityTestSuite -} - -func (s *getSnapshotsTestSuite) SetupSuite() { - s.pth = "snapshots" - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getSnapshotsTestSuite) TestGetSnapshots() { - sps, err := s.clt.GetSnapshots(s.ctx, s.req) - - for _, spt := range sps { - s.NotEmpty(spt.Identifier) - s.NotEmpty(spt.Version) - s.NotEmpty(spt.DateModified) - s.NotEmpty(spt.IsPartOf) - s.NotEmpty(spt.InLanguage) - s.NotNil(spt.Namespace) - s.NotEmpty(spt.Size) - } - - if s.sts != http.StatusOK { - s.Empty(sps) - s.Error(err) - } else { - s.NotEmpty(sps) - s.NoError(err) - } -} - -func TestGetSnapshots(t *testing.T) { - for _, tcs := range []*getSnapshotsTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/snapshots.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getSnapshotTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *getSnapshotTestSuite) SetupSuite() { - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("snapshots/%s", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getSnapshotTestSuite) TestGetSnapshot() { - spt, err := s.clt.GetSnapshot(s.ctx, s.idr, s.req) - - if s.sts != http.StatusOK { - s.Empty(spt.Identifier) - s.Empty(spt.Version) - s.Empty(spt.DateModified) - s.Empty(spt.IsPartOf) - s.Empty(spt.InLanguage) - s.Nil(spt.Namespace) - s.Empty(spt.Size) - s.Error(err) - } else { - s.NotEmpty(spt.Identifier) - s.NotEmpty(spt.Version) - s.NotEmpty(spt.DateModified) - s.NotEmpty(spt.IsPartOf) - s.NotEmpty(spt.InLanguage) - s.NotNil(spt.Namespace) - s.NotEmpty(spt.Size) - s.NoError(err) - } -} - -func TestGetSnapshot(t *testing.T) { - for _, tcs := range []*getSnapshotTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/snapshot.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type headSnapshotTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *headSnapshotTestSuite) SetupSuite() { - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("snapshots/%s/download", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *headSnapshotTestSuite) TestHeadSnapshot() { - shs, err := s.clt.HeadSnapshot(s.ctx, s.idr) - - if s.sts != http.StatusOK { - s.Empty(shs) - s.Error(err) - } else { - s.NotEmpty(shs) - s.NotEmpty(shs.ContentLength) - s.NotEmpty(shs.ContentType) - s.NotEmpty(shs.ETag) - s.NotEmpty(shs.LastModified) - s.NoError(err) - } -} - -func TestHeadSnapshot(t *testing.T) { - for _, tcs := range []*headSnapshotTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - mtd: http.MethodHead, - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type downloadSnapshotTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *downloadSnapshotTestSuite) SetupSuite() { - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("snapshots/%s/download", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *downloadSnapshotTestSuite) TestDownloadSnapshot() { - tmf, err := os.CreateTemp("", "spt_tmp.tar.gz") - - if err != nil { - log.Fatal(err) - } - - defer tmf.Close() - err = s.clt.DownloadSnapshot(s.ctx, s.idr, tmf) - - if s.sts != http.StatusOK { - s.Error(err) - } else { - s.NoError(err) - } -} - -func TestDownloadSnapshot(t *testing.T) { - for _, tcs := range []*downloadSnapshotTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/simplewiki_namespace_0.tar.gz", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type readSnapshotTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *readSnapshotTestSuite) SetupSuite() { - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("snapshots/%s/download", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *readSnapshotTestSuite) TestReadSnapshot() { - nmc := 0 - err := s.clt.ReadSnapshot(s.ctx, s.idr, func(art *api.Article) error { - s.NotEmpty(art.Name) - s.NotEmpty(art.Identifier) - nmc++ - return nil - }) - - if s.sts != http.StatusOK { - s.Error(err) - s.Zero(nmc) - } else { - s.NoError(err) - s.NotZero(nmc) - } -} - -func TestReadSnapshot(t *testing.T) { - for _, tcs := range []*readSnapshotTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/simplewiki_namespace_0.tar.gz", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getArticlesTestSuite struct { - baseEntityTestSuite - nme string -} - -func (s *getArticlesTestSuite) SetupSuite() { - s.nme = "Squirrel" - s.pth = fmt.Sprintf("articles/%s", s.nme) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getArticlesTestSuite) TestGetArticles() { - ars, err := s.clt.GetArticles(s.ctx, s.nme, s.req) - - for _, art := range ars { - s.NotEmpty(art.Name) - s.NotEmpty(art.Identifier) - s.NotEmpty(art.Abstract) - s.NotEmpty(art.URL) - } - - if s.sts != http.StatusOK { - s.Empty(ars) - s.Error(err) - } else { - s.NotEmpty(ars) - s.NoError(err) - } -} - -func TestGetArticles(t *testing.T) { - for _, tcs := range []*getArticlesTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/articles.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getStructuredContentsTestSuite struct { - baseEntityTestSuite - nme string -} - -func (s *getStructuredContentsTestSuite) SetupSuite() { - s.nme = "Squirrel" - s.pth = fmt.Sprintf("structured-contents/%s", s.nme) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getStructuredContentsTestSuite) TestGetStructuredContents() { - scs, err := s.clt.GetStructuredContents(s.ctx, s.nme, s.req) - - for _, sct := range scs { - s.NotEmpty(sct.Name) - s.NotEmpty(sct.Identifier) - s.NotEmpty(sct.URL) - s.NotEmpty(sct.Abstract) - } - - if s.sts != http.StatusOK { - s.Empty(scs) - s.Error(err) - } else { - s.NotEmpty(scs) - s.NoError(err) - } -} - -func TestGetStructuredContents(t *testing.T) { - for _, tcs := range []*getStructuredContentsTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/structured-contents.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getStructuredSnapshotsTestSuite struct { - baseEntityTestSuite -} - -func (s *getStructuredSnapshotsTestSuite) SetupSuite() { - s.pth = "snapshots/structured-contents" - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getStructuredSnapshotsTestSuite) TestGetStructuredSnapshots() { - sps, err := s.clt.GetStructuredSnapshots(s.ctx, s.req) - - for _, spt := range sps { - s.NotEmpty(spt.Identifier) - s.NotEmpty(spt.Version) - s.NotEmpty(spt.DateModified) - s.NotEmpty(spt.IsPartOf) - s.NotEmpty(spt.InLanguage) - s.NotNil(spt.Namespace) - s.NotEmpty(spt.Size) - } - - if s.sts != http.StatusOK { - s.Empty(sps) - s.Error(err) - } else { - s.NotEmpty(sps) - s.NoError(err) - } -} - -func TestGetStructuredSnapshots(t *testing.T) { - for _, tcs := range []*getStructuredSnapshotsTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/snapshots.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type getStructuredSnapshotTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *getStructuredSnapshotTestSuite) SetupSuite() { - s.idr = "enwiki_namespace_0" - s.pth = fmt.Sprintf("snapshots/structured-contents/%s", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *getStructuredSnapshotTestSuite) TestGetStructuredSnapshot() { - spt, err := s.clt.GetStructuredSnapshot(s.ctx, s.idr, s.req) - - if s.sts != http.StatusOK { - s.Empty(spt.Identifier) - s.Empty(spt.Version) - s.Empty(spt.DateModified) - s.Empty(spt.IsPartOf) - s.Empty(spt.InLanguage) - s.Nil(spt.Namespace) - s.Empty(spt.Size) - s.Error(err) - } else { - s.NotEmpty(spt.Identifier) - s.NotEmpty(spt.Version) - s.NotEmpty(spt.DateModified) - s.NotEmpty(spt.IsPartOf) - s.NotEmpty(spt.InLanguage) - s.NotNil(spt.Namespace) - s.NotEmpty(spt.Size) - s.NoError(err) - } -} - -func TestGetStructuredSnapshot(t *testing.T) { - for _, tcs := range []*getStructuredSnapshotTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/snapshot.json", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type headStructuredSnapshotTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *headStructuredSnapshotTestSuite) SetupSuite() { - s.idr = "enwiki_namespace_0" - s.pth = fmt.Sprintf("snapshots/structured-contents/%s/download", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *headStructuredSnapshotTestSuite) TestHeadStructuredSnapshot() { - shs, err := s.clt.HeadStructuredSnapshot(s.ctx, s.idr) - - if s.sts != http.StatusOK { - s.Empty(shs) - s.Error(err) - } else { - s.NotEmpty(shs) - s.NotEmpty(shs.ContentLength) - s.NotEmpty(shs.ContentType) - s.NotEmpty(shs.ETag) - s.NotEmpty(shs.LastModified) - s.NoError(err) - } -} - -func TestHeadStructuredSnapshot(t *testing.T) { - for _, tcs := range []*headSnapshotTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - mtd: http.MethodHead, - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type downloadStructuredSnapshotTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *downloadStructuredSnapshotTestSuite) SetupSuite() { - s.idr = "enwiki_namespace_0" - s.pth = fmt.Sprintf("snapshots/structured-contents/%s/download", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *downloadStructuredSnapshotTestSuite) TestDownloadStructuredSnapshot() { - tmf, err := os.CreateTemp("", "spt_tmp.tar.gz") - - if err != nil { - log.Fatal(err) - } - - defer tmf.Close() - err = s.clt.DownloadStructuredSnapshot(s.ctx, s.idr, tmf) - - if s.sts != http.StatusOK { - s.Error(err) - } else { - s.NoError(err) - } -} - -func TestDownloadStructuredSnapshot(t *testing.T) { - for _, tcs := range []*downloadStructuredSnapshotTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/enwiki_namespace_0.tar.gz", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusNotFound, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type readStructuredSnapshotTestSuite struct { - baseEntityTestSuite - idr string -} - -func (s *readStructuredSnapshotTestSuite) SetupSuite() { - s.idr = "simplewiki_namespace_0" - s.pth = fmt.Sprintf("snapshots/structured-contents/%s/download", s.idr) - s.baseEntityTestSuite.SetupSuite() -} - -func (s *readSnapshotTestSuite) TestReadStructuredSnapshot() { - nmc := 0 - err := s.clt.ReadStructuredSnapshot(s.ctx, s.idr, func(art *api.StructuredContent) error { - s.NotEmpty(art.Name) - s.NotEmpty(art.Identifier) - nmc++ - return nil - }) - - if s.sts != http.StatusOK { - s.Error(err) - s.Zero(nmc) - } else { - s.NoError(err) - s.NotZero(nmc) - } -} - -func TestReadStructuredSnapshot(t *testing.T) { - for _, tcs := range []*readStructuredSnapshotTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/enwiki_namespace_0.tar.gz", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} - -type streamArticlesTestSuite struct { - baseEntityTestSuite -} - -func (s *streamArticlesTestSuite) SetupSuite() { - s.pth = "articles" - s.baseEntityTestSuite.SetupSuite() -} - -func (s *streamArticlesTestSuite) TestStreamArticles() { - nmc := 0 - err := s.clt.StreamArticles(s.ctx, s.req, func(art *api.Article) error { - s.NotEmpty(art.Name) - s.NotEmpty(art.Identifier) - nmc++ - return nil - }) - - if s.sts != http.StatusOK { - s.Error(err) - s.Zero(nmc) - } else { - s.NoError(err) - s.NotZero(nmc) - } -} - -func TestStreamArticles(t *testing.T) { - for _, tcs := range []*streamArticlesTestSuite{ - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusOK, - fph: "testdata/articles.ndjson", - }, - }, - { - baseEntityTestSuite: baseEntityTestSuite{ - sts: http.StatusUnauthorized, - fph: "testdata/error.json", - }, - }, - } { - suite.Run(t, tcs) - } -} diff --git a/pkg/api/api.go b/pkg/api/api.go index 631b906..d7f458c 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -209,6 +209,31 @@ type ChunksReader interface { ReadChunk(ctx context.Context, sid string, idr string, cbk ReadCallback) error } +// StructuredSnapshotsGetter is an interface for getting multiple structured content snapshots. +type StructuredSnapshotsGetter interface { + GetStructuredSnapshots(ctx context.Context, req *Request) ([]*StructuredContentSnapshot, error) +} + +// StructuredSnapshotGetter is an interface for getting a single structured content snapshot by ID. +type StructuredSnapshotGetter interface { + GetStructuredSnapshot(ctx context.Context, idr string, req *Request) (*StructuredContentSnapshot, error) +} + +// StructuredSnapshotHeader is an interface for getting the headers of a single structured content snapshot by ID. +type StructuredSnapshotHeader interface { + HeadStructuredSnapshot(ctx context.Context, idr string) (*Headers, error) +} + +// StructuredSnapshotReader is an interface for reading the contents of a single structured content snapshot by ID with a callback function. +type StructuredSnapshotReader interface { + ReadStructuredSnapshot(ctx context.Context, idr string, cbk ReadCallback) error +} + +// StructuredSnapshotDownloader is an interface for downloading a single structured content snapshot by ID to a writer. +type StructuredSnapshotDownloader interface { + DownloadStructuredSnapshot(ctx context.Context, idr string, wsk io.WriteSeeker) error +} + // API interface tha encapsulates the whole functionality of the client. // Can be used with composition in unit testing. type API interface { @@ -239,6 +264,11 @@ type API interface { ChunksReader ArticlesGetter StructuredContentsGetter + StructuredSnapshotsGetter + StructuredSnapshotGetter + StructuredSnapshotHeader + StructuredSnapshotReader + StructuredSnapshotDownloader ArticlesStreamer } @@ -742,6 +772,33 @@ func (c *Client) GetStructuredContents(ctx context.Context, nme string, req *Req return ats, c.getEntity(ctx, req, fmt.Sprintf("structured-contents/%s", nme), &ats) } +// GetStructuredSnapshots retrieves a list of all snapshots and returns an error if any. +func (c *Client) GetStructuredSnapshots(ctx context.Context, req *Request) ([]*StructuredContentSnapshot, error) { + sps := []*StructuredContentSnapshot{} + return sps, c.getEntity(ctx, req, "snapshots/structured-contents/", &sps) +} + +// GetStructuredSnapshot retrieves a single snapshot for a specific ID and returns an error if any. +func (c *Client) GetStructuredSnapshot(ctx context.Context, idr string, req *Request) (*StructuredContentSnapshot, error) { + snp := new(StructuredContentSnapshot) + return snp, c.getEntity(ctx, req, fmt.Sprintf("snapshots/structured-contents/%s", idr), snp) +} + +// HeadStructuredSnapshot retrieves only the headers of a single snapshot for a specific ID, and returns an error if any. +func (c *Client) HeadStructuredSnapshot(ctx context.Context, idr string) (*Headers, error) { + return c.headEntity(ctx, fmt.Sprintf("snapshots/structured-contents/%s/download", idr)) +} + +// ReadStructuredSnapshot reads the contents of a single snapshots for a specific ID, and invokes the specified callback function for each chunk read. +func (c *Client) ReadStructuredSnapshot(ctx context.Context, idr string, cbk ReadCallback) error { + return c.readEntity(ctx, fmt.Sprintf("snapshots/structured-contents/%s/download", idr), cbk) +} + +// DownloadStructuredSnapshot downloads the contents of a single snapshot for a specific ID, and writes the data to the specified WriteSeeker. +func (c *Client) DownloadStructuredSnapshot(ctx context.Context, idr string, wsk io.WriteSeeker) error { + return c.downloadEntity(ctx, fmt.Sprintf("snapshots/structured-contents/%s/download", idr), wsk) +} + // StreamArticles streams all available articles from the server and applies a callback function to each article // as they arrive. The callback function must implement the ReadCallback interface. func (c *Client) StreamArticles(ctx context.Context, req *Request, cbk ReadCallback) error { diff --git a/pkg/api/api_test.go b/pkg/api/api_test.go index 4e71596..2c04f2f 100644 --- a/pkg/api/api_test.go +++ b/pkg/api/api_test.go @@ -1412,6 +1412,251 @@ func TestGetStructuredContents(t *testing.T) { } } +type GetStructuredSnapshotsTestSuite struct { + baseEntityTestSuite +} + +func (s *GetStructuredSnapshotsTestSuite) SetupSuite() { + s.pth = "snapshots/structured-contents/" + s.baseEntityTestSuite.SetupSuite() +} + +func (s *GetStructuredSnapshotsTestSuite) TestGetStructuredSnapshots() { + sps, err := s.clt.GetStructuredSnapshots(s.ctx, s.req) + + for _, spt := range sps { + s.NotEmpty(spt.Identifier) + s.NotEmpty(spt.Version) + s.NotEmpty(spt.DateModified) + s.NotEmpty(spt.IsPartOf) + s.NotEmpty(spt.InLanguage) + s.NotNil(spt.Namespace) + s.NotEmpty(spt.Size) + } + + if s.sts != http.StatusOK { + s.Empty(sps) + s.Error(err) + } else { + s.NotEmpty(sps) + s.NoError(err) + } +} + +func TestGetStructuredSnapshots(t *testing.T) { + for _, tcs := range []*GetStructuredSnapshotsTestSuite{ + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusOK, + fph: "testdata/structured-contents-snapshots.json", + }, + }, + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusNotFound, + fph: "testdata/error.json", + }, + }, + } { + suite.Run(t, tcs) + } +} + +type getStructuredSnapshotTestSuite struct { + baseEntityTestSuite + idr string +} + +func (s *getStructuredSnapshotTestSuite) SetupSuite() { + s.idr = "enwiki_namespace_0" + s.pth = fmt.Sprintf("snapshots/structured-contents/%s", s.idr) + s.baseEntityTestSuite.SetupSuite() +} + +func (s *getStructuredSnapshotTestSuite) TestGetStructuredSnapshot() { + spt, err := s.clt.GetStructuredSnapshot(s.ctx, s.idr, s.req) + + if s.sts != http.StatusOK { + s.Empty(spt.Identifier) + s.Empty(spt.Version) + s.Empty(spt.DateModified) + s.Empty(spt.IsPartOf) + s.Empty(spt.InLanguage) + s.Nil(spt.Namespace) + s.Empty(spt.Size) + s.Error(err) + } else { + s.NotEmpty(spt.Identifier) + s.NotEmpty(spt.Version) + s.NotEmpty(spt.DateModified) + s.NotEmpty(spt.IsPartOf) + s.NotEmpty(spt.InLanguage) + s.NotNil(spt.Namespace) + s.NotEmpty(spt.Size) + s.NoError(err) + } +} + +func TestGetStructuredSnapshot(t *testing.T) { + for _, tcs := range []*getStructuredSnapshotTestSuite{ + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusOK, + fph: "testdata/structured-contents-snapshot.json", + }, + }, + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusNotFound, + fph: "testdata/error.json", + }, + }, + } { + suite.Run(t, tcs) + } +} + +type headStructuredSnapshotTestSuite struct { + baseEntityTestSuite + idr string +} + +func (s *headStructuredSnapshotTestSuite) SetupSuite() { + s.idr = "enwiki_namespace_0" + s.pth = fmt.Sprintf("snapshots/structured-contents/%s/download", s.idr) + s.baseEntityTestSuite.SetupSuite() +} + +func (s *headStructuredSnapshotTestSuite) TestHeadStructuredSnapshot() { + shs, err := s.clt.HeadStructuredSnapshot(s.ctx, s.idr) + + if s.sts != http.StatusOK { + s.Empty(shs) + s.Error(err) + } else { + s.NotEmpty(shs) + s.NotEmpty(shs.ContentLength) + s.NotEmpty(shs.ContentType) + s.NotEmpty(shs.ETag) + s.NotEmpty(shs.LastModified) + s.NoError(err) + } +} + +func TestHeadStructuredSnapshot(t *testing.T) { + for _, tcs := range []*headStructuredSnapshotTestSuite{ + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusOK, + mtd: http.MethodHead, + }, + }, + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusNotFound, + fph: "testdata/error.json", + }, + }, + } { + suite.Run(t, tcs) + } +} + +type downloadStructuredSnapshotTestSuite struct { + baseEntityTestSuite + idr string +} + +func (s *downloadStructuredSnapshotTestSuite) SetupSuite() { + s.idr = "simplewiki_namespace_0" + s.pth = fmt.Sprintf("snapshots/structured-contents/%s/download", s.idr) + s.baseEntityTestSuite.SetupSuite() +} + +func (s *downloadStructuredSnapshotTestSuite) TestDownloadStructuredSnapshot() { + tmf, err := os.CreateTemp("", "spt_tmp.tar.gz") + + if err != nil { + log.Fatal(err) + } + + defer tmf.Close() + err = s.clt.DownloadStructuredSnapshot(s.ctx, s.idr, tmf) + + if s.sts != http.StatusOK { + s.Error(err) + } else { + s.NoError(err) + } +} + +func TestDownloadStructuredSnapshot(t *testing.T) { + for _, tcs := range []*downloadStructuredSnapshotTestSuite{ + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusOK, + fph: "testdata/simplewiki_namespace_0.tar.gz", + }, + }, + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusNotFound, + fph: "testdata/error.json", + }, + }, + } { + suite.Run(t, tcs) + } +} + +type readStructuredSnapshotTestSuite struct { + baseEntityTestSuite + idr string +} + +func (s *readStructuredSnapshotTestSuite) SetupSuite() { + s.idr = "simplewiki_namespace_0" + s.pth = fmt.Sprintf("snapshots/structured-contents/%s/download", s.idr) + s.baseEntityTestSuite.SetupSuite() +} + +func (s *readStructuredSnapshotTestSuite) TestReadStructuredSnapshot() { + nmc := 0 + err := s.clt.ReadStructuredSnapshot(s.ctx, s.idr, func(art *api.Article) error { + s.NotEmpty(art.Name) + s.NotEmpty(art.Identifier) + nmc++ + return nil + }) + + if s.sts != http.StatusOK { + s.Error(err) + s.Zero(nmc) + } else { + s.NoError(err) + s.NotZero(nmc) + } +} + +func TestReadStructuredSnapshot(t *testing.T) { + for _, tcs := range []*readStructuredSnapshotTestSuite{ + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusOK, + fph: "testdata/simplewiki_namespace_0.tar.gz", + }, + }, + { + baseEntityTestSuite: baseEntityTestSuite{ + sts: http.StatusUnauthorized, + fph: "testdata/error.json", + }, + }, + } { + suite.Run(t, tcs) + } +} + type streamArticlesTestSuite struct { baseEntityTestSuite } diff --git a/pkg/api/structuredcontent.go b/pkg/api/structuredcontent.go index 901c54a..2268892 100644 --- a/pkg/api/structuredcontent.go +++ b/pkg/api/structuredcontent.go @@ -50,6 +50,29 @@ type StructuredContent struct { Image *Image `json:"image,omitempty"` } +type StructuredContentSnapshot struct { + // Identifier is the unique identifier for the structured content snapshot. + Identifier string `json:"identifier,omitempty"` + + // Version of the structured content snapshot as a md5 checksum. + Version string `json:"version,omitempty"` + + // DateModified date and time the structured content snapshot was last modified. + DateModified *time.Time `json:"date_modified,omitempty"` + + // IsPartOf the project that this structured content snapshot belongs to. + IsPartOf *Project `json:"is_part_of,omitempty"` + + // InLanguage the language of the contents of the structured content snapshot. + InLanguage *Language `json:"in_language,omitempty"` + + // Namespace of the structured content snapshot. + Namespace *Namespace `json:"namespace,omitempty"` + + // Size of the structured content snapshot. + Size *Size `json:"size,omitempty"` +} + // Part represents a part of a structured content (section, field etc.). type Part struct { // Name is the name of the part. diff --git a/pkg/api/testdata/structured-contents-snapshot.json b/pkg/api/testdata/structured-contents-snapshot.json new file mode 100644 index 0000000..6597913 --- /dev/null +++ b/pkg/api/testdata/structured-contents-snapshot.json @@ -0,0 +1,18 @@ +{ + "identifier": "enwiki_namespace_0", + "version": "aabf49b70dabd809272293e5a9f2d589", + "date_modified": "2024-10-12T00:34:58.163473679Z", + "is_part_of": { + "identifier": "enwiki" + }, + "in_language": { + "identifier": "en" + }, + "namespace": { + "identifier": 0 + }, + "size": { + "value": 11031.334e0, + "unit_text": "MB" + } +} \ No newline at end of file diff --git a/pkg/api/testdata/structured-contents-snapshots.json b/pkg/api/testdata/structured-contents-snapshots.json new file mode 100644 index 0000000..8ca2b45 --- /dev/null +++ b/pkg/api/testdata/structured-contents-snapshots.json @@ -0,0 +1,110 @@ +[ + { + "identifier": "dewiki_namespace_0", + "version": "fd7ac8d6b4d917d1f8d9185b14454705", + "date_modified": "2024-09-23T00:44:46.097407365Z", + "is_part_of": { + "identifier": "dewiki" + }, + "in_language": { + "identifier": "de" + }, + "namespace": { + "identifier": 0 + }, + "size": { + "value": 2567.015e0, + "unit_text": "MB" + } + }, + { + "identifier": "enwiki_namespace_0", + "version": "aabf49b70dabd809272293e5a9f2d589", + "date_modified": "2024-10-12T00:34:58.163473679Z", + "is_part_of": { + "identifier": "enwiki" + }, + "in_language": { + "identifier": "en" + }, + "namespace": { + "identifier": 0 + }, + "size": { + "value": 11031.334e0, + "unit_text": "MB" + } + }, + { + "identifier": "eswiki_namespace_0", + "version": "5672b255aa99661fb38d1e4288abe927", + "date_modified": "2024-09-23T00:50:47.983089521Z", + "is_part_of": { + "identifier": "eswiki" + }, + "in_language": { + "identifier": "es" + }, + "namespace": { + "identifier": 0 + }, + "size": { + "value": 1915.143e0, + "unit_text": "MB" + } + }, + { + "identifier": "frwiki_namespace_0", + "version": "0d09543c08e3f988dfc1868411870953", + "date_modified": "2024-09-23T01:08:31.268437146Z", + "is_part_of": { + "identifier": "frwiki" + }, + "in_language": { + "identifier": "fr" + }, + "namespace": { + "identifier": 0 + }, + "size": { + "value": 2953.023e0, + "unit_text": "MB" + } + }, + { + "identifier": "itwiki_namespace_0", + "version": "9e057ec37f5026cd8aec14e48898a9d8", + "date_modified": "2024-09-23T00:56:17.206621008Z", + "is_part_of": { + "identifier": "itwiki" + }, + "in_language": { + "identifier": "it" + }, + "namespace": { + "identifier": 0 + }, + "size": { + "value": 1638.477e0, + "unit_text": "MB" + } + }, + { + "identifier": "ptwiki_namespace_0", + "version": "7b6d4c080df6e3cd7ee42466bcde43f2", + "date_modified": "2024-10-10T00:01:05.482710521Z", + "is_part_of": { + "identifier": "ptwiki" + }, + "in_language": { + "identifier": "pt" + }, + "namespace": { + "identifier": 0 + }, + "size": { + "value": 43.391e0, + "unit_text": "MB" + } + } +] \ No newline at end of file