Skip to content

Commit

Permalink
add zoekt-mirror-gitea (#844)
Browse files Browse the repository at this point in the history
* add zoekt-mirror-gitea

* * Clean up setting the default
* update note about topic filtering not being implemented as topics are missing from the API
* cleanup some pointers

* cleanup some code syntax
  • Loading branch information
techknowlogick authored Oct 14, 2024
1 parent 5687809 commit da3626e
Show file tree
Hide file tree
Showing 3 changed files with 308 additions and 0 deletions.
292 changes: 292 additions & 0 deletions cmd/zoekt-mirror-gitea/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// This binary fetches all repos of a user or organization and clones
// them. It is strongly recommended to get a personal API token from
// https://gitea.com/user/settings/applications, save the token in a
// file, and point the --token option to it.
package main

import (
"flag"
"fmt"
"log"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"

"code.gitea.io/sdk/gitea"

"github.com/sourcegraph/zoekt/gitindex"
)

type topicsFlag []string

func (f *topicsFlag) String() string {
return strings.Join(*f, ",")
}

func (f *topicsFlag) Set(value string) error {
*f = append(*f, value)
return nil
}

type reposFilters struct {
noArchived *bool
}

func main() {
dest := flag.String("dest", "", "destination directory")
giteaURL := flag.String("url", "https://gitea.com/", "Gitea url. If not set gitea.com will be used as the host.")
org := flag.String("org", "", "organization to mirror")
user := flag.String("user", "", "user to mirror")
token := flag.String("token",
filepath.Join(os.Getenv("HOME"), ".gitea-token"),
"file holding API token.")
forks := flag.Bool("forks", false, "also mirror forks.")
deleteRepos := flag.Bool("delete", false, "delete missing repos")
namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.")
excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
topics := topicsFlag{}
flag.Var(&topics, "topic", "only clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
excludeTopics := topicsFlag{}
flag.Var(&excludeTopics, "exclude_topic", "don't clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
noArchived := flag.Bool("no_archived", false, "mirror only projects that are not archived")

flag.Parse()

if *dest == "" {
log.Fatal("must set --dest")
}
if *giteaURL == "" && *org == "" && *user == "" {
log.Fatal("must set either --org or --user when gitea.com is used as host")
}

var host string
var client *gitea.Client
clientOptions := []gitea.ClientOption{}

destDir := filepath.Join(*dest, host)
if err := os.MkdirAll(destDir, 0o755); err != nil {
log.Fatal(err)
}

if *token != "" {
content, err := os.ReadFile(*token)
if err != nil {
log.Fatal(err)
}
clientOptions = append(clientOptions, gitea.SetToken(string(content)))
}
client, err := gitea.NewClient(*giteaURL, clientOptions...)
if err != nil {
log.Fatal(err)
}

reposFilters := reposFilters{
noArchived: noArchived,
}
var repos []*gitea.Repository
switch {
case *org != "":
log.Printf("fetch repos for org: %s", *org)
repos, err = getOrgRepos(client, *org, reposFilters)
case *user != "":
log.Printf("fetch repos for user: %s", *user)
repos, err = getUserRepos(client, *user, reposFilters)
default:
log.Printf("no user or org specified, cloning all repos.")
repos, err = getUserRepos(client, "", reposFilters)
}

if err != nil {
log.Fatal(err)
}

if !*forks {
trimmed := []*gitea.Repository{}
for _, r := range repos {
if r.Fork {
continue
}
trimmed = append(trimmed, r)
}
repos = trimmed
}

filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
if err != nil {
log.Fatal(err)
}

{
trimmed := []*gitea.Repository{}
for _, r := range repos {
if !filter.Include(r.Name) {
log.Println(r.Name)
continue
}
trimmed = append(trimmed, r)
}
repos = trimmed
}

if err := cloneRepos(destDir, repos); err != nil {
log.Fatalf("cloneRepos: %v", err)
}

if *deleteRepos {
if err := deleteStaleRepos(*dest, filter, repos, *org+*user); err != nil {
log.Fatalf("deleteStaleRepos: %v", err)
}
}
}

func deleteStaleRepos(destDir string, filter *gitindex.Filter, repos []*gitea.Repository, user string) error {
var baseURL string
if len(repos) > 0 {
baseURL = repos[0].HTMLURL
} else {
return nil
}
u, err := url.Parse(baseURL)
if err != nil {
return err
}
u.Path = user

names := map[string]struct{}{}
for _, r := range repos {
u, err := url.Parse(r.HTMLURL)
if err != nil {
return err
}

names[filepath.Join(u.Host, u.Path+".git")] = struct{}{}
}
if err := gitindex.DeleteRepos(destDir, u, names, filter); err != nil {
log.Fatalf("deleteRepos: %v", err)
}
return nil
}

func filterRepositories(repos []*gitea.Repository, noArchived bool) (filteredRepos []*gitea.Repository) {
for _, repo := range repos {
if noArchived && repo.Archived {
continue
}
filteredRepos = append(filteredRepos, repo)
}
return
}

func getOrgRepos(client *gitea.Client, org string, reposFilters reposFilters) ([]*gitea.Repository, error) {
var allRepos []*gitea.Repository
searchOptions := &gitea.SearchRepoOptions{}
// OwnerID
organization, _, err := client.GetOrg(org)
if err != nil {
return nil, err
}

searchOptions.OwnerID = organization.ID

for {
repos, resp, err := client.SearchRepos(*searchOptions)
if err != nil {
return nil, err
}
if len(repos) == 0 {
break
}

searchOptions.Page = resp.NextPage
repos = filterRepositories(repos, *reposFilters.noArchived)
allRepos = append(allRepos, repos...)
if resp.NextPage == 0 {
break
}
}
return allRepos, nil
}

func getUserRepos(client *gitea.Client, user string, reposFilters reposFilters) ([]*gitea.Repository, error) {
var allRepos []*gitea.Repository
searchOptions := &gitea.SearchRepoOptions{}
u, _, err := client.GetUserInfo(user)
if err != nil {
return nil, err
}
searchOptions.OwnerID = u.ID
for {
repos, resp, err := client.SearchRepos(*searchOptions)
if err != nil {
return nil, err
}
if len(repos) == 0 {
break
}
repos = filterRepositories(repos, *reposFilters.noArchived)
allRepos = append(allRepos, repos...)
searchOptions.Page = resp.NextPage
if resp.NextPage == 0 {
break
}
}
return allRepos, nil
}

func cloneRepos(destDir string, repos []*gitea.Repository) error {
for _, r := range repos {
host, err := url.Parse(r.HTMLURL)
if err != nil {
return err
}
log.Printf("cloning %s", r.HTMLURL)

config := map[string]string{
"zoekt.web-url-type": "gitea",
"zoekt.web-url": r.HTMLURL,
"zoekt.name": filepath.Join(host.Hostname(), r.FullName),

"zoekt.gitea-stars": strconv.Itoa(r.Stars),
"zoekt.gitea-watchers": strconv.Itoa(r.Watchers),
"zoekt.gitea-subscribers": strconv.Itoa(r.Watchers), // FIXME: Get repo subscribers from API
"zoekt.gitea-forks": strconv.Itoa(r.Forks),

"zoekt.archived": marshalBool(r.Archived),
"zoekt.fork": marshalBool(r.Fork),
"zoekt.public": marshalBool(r.Private || r.Internal), // count internal repos as private
}
dest, err := gitindex.CloneRepo(destDir, r.FullName, r.CloneURL, config)
if err != nil {
return err
}
if dest != "" {
fmt.Println(dest)
}

}

return nil
}

func marshalBool(b bool) string {
if b {
return "1"
}
return "0"
}
7 changes: 7 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,18 @@ require (
google.golang.org/protobuf v1.34.2
)

require (
github.com/davidmz/go-pageant v1.0.2 // indirect
github.com/go-fed/httpsig v1.1.0 // indirect
github.com/hashicorp/go-version v1.6.0 // indirect
)

require (
cloud.google.com/go v0.115.1 // indirect
cloud.google.com/go/auth v0.9.3 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect
cloud.google.com/go/compute/metadata v0.5.0 // indirect
code.gitea.io/sdk/gitea v0.19.0
dario.cat/mergo v1.0.1 // indirect
github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
Expand Down
9 changes: 9 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ cloud.google.com/go/profiler v0.4.1 h1:Q7+lOvikTGMJ/IAWocpYYGit4SIIoILmVZfEEWTOR
cloud.google.com/go/profiler v0.4.1/go.mod h1:LBrtEX6nbvhv1w/e5CPZmX9ajGG9BGLtGbv56Tg4SHs=
cloud.google.com/go/storage v1.43.0 h1:CcxnSohZwizt4LCzQHWvBf1/kvtHUn7gk9QERXPyXFs=
cloud.google.com/go/storage v1.43.0/go.mod h1:ajvxEa7WmZS1PxvKRq4bq0tFT3vMd502JwstCcYv0Q0=
code.gitea.io/sdk/gitea v0.19.0 h1:8I6s1s4RHgzxiPHhOQdgim1RWIRcr0LVMbHBjBFXq4Y=
code.gitea.io/sdk/gitea v0.19.0/go.mod h1:IG9xZJoltDNeDSW0qiF2Vqx5orMWa7OhVWrjvrd5NpI=
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
Expand Down Expand Up @@ -73,6 +75,8 @@ github.com/cyphar/filepath-securejoin v0.3.1/go.mod h1:F7i41x/9cBF7lzCrVsYs9fuzw
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davidmz/go-pageant v1.0.2 h1:bPblRCh5jGU+Uptpz6LgMZGD5hJoOt7otgT454WvHn0=
github.com/davidmz/go-pageant v1.0.2/go.mod h1:P2EDDnMqIwG5Rrp05dTRITj9z2zpGcD9efWSkTNKLIE=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a h1:mATvB/9r/3gvcejNsXKSkQ6lcIaNec2nyfOdlTBR2lU=
Expand Down Expand Up @@ -108,6 +112,8 @@ github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCc
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
github.com/go-fed/httpsig v1.1.0 h1:9M+hb0jkEICD8/cAiNqEB66R87tTINszBRTjwjQzWcI=
github.com/go-fed/httpsig v1.1.0/go.mod h1:RCMrTZvN1bJYtofsG4rd5NaO5obxQ5xBkdiS7xsT7bM=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic=
github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU=
Expand Down Expand Up @@ -199,6 +205,8 @@ github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB1
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hexops/autogold v0.8.1/go.mod h1:97HLDXyG23akzAoRYJh/2OBs3kd80eHyKPvZw0S5ZBY=
github.com/hexops/autogold v1.3.1 h1:YgxF9OHWbEIUjhDbpnLhgVsjUDsiHDTyDfy2lrfdlzo=
github.com/hexops/autogold v1.3.1/go.mod h1:sQO+mQUCVfxOKPht+ipDSkJ2SCJ7BNJVHZexsXqWMx4=
Expand Down Expand Up @@ -375,6 +383,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
Expand Down

0 comments on commit da3626e

Please sign in to comment.