From a0ba2a7a9e9fe3293457ea03ef2b5e52ee65bb29 Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:52:04 -0500 Subject: [PATCH 1/5] feat: rigging pr decorator for robopage prs --- .github/scripts/rigging_pr_decorator.py | 145 +++++++++++++++++++ .github/workflows/rigging_pr_description.yml | 64 ++++++++ 2 files changed, 209 insertions(+) create mode 100644 .github/scripts/rigging_pr_decorator.py create mode 100644 .github/workflows/rigging_pr_description.yml diff --git a/.github/scripts/rigging_pr_decorator.py b/.github/scripts/rigging_pr_decorator.py new file mode 100644 index 0000000..fb41af0 --- /dev/null +++ b/.github/scripts/rigging_pr_decorator.py @@ -0,0 +1,145 @@ +import asyncio +import base64 +import os +import typing as t + +from pydantic import ConfigDict, StringConstraints + +import rigging as rg +from rigging import logger +from rigging.generator import GenerateParams, Generator, register_generator + +logger.enable("rigging") + +MAX_TOKENS = 8000 +TRUNCATION_WARNING = "\n\n**Note**: Due to the large size of this diff, some content has been truncated." +str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)] + + +class PRDiffData(rg.Model): + """XML model for PR diff data""" + + content: str_strip = rg.element() + + @classmethod + def xml_example(cls) -> str: + return """example diff content""" + + +class PRDecorator(Generator): + """Generator for creating PR descriptions""" + + model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True) + + api_key: str = "" + max_tokens: int = MAX_TOKENS + + def __init__(self, model: str, params: rg.GenerateParams) -> None: + api_key = params.extra.get("api_key") + if not api_key: + raise ValueError("api_key is required in params.extra") + + super().__init__(model=model, params=params, api_key=api_key) + self.api_key = api_key + self.max_tokens = params.max_tokens or MAX_TOKENS + + async def generate_messages( + self, + messages: t.Sequence[t.Sequence[rg.Message]], + params: t.Sequence[GenerateParams], + ) -> t.Sequence[rg.GeneratedMessage]: + responses = [] + for message_seq, p in zip(messages, params): + base_generator = rg.get_generator(self.model, params=p) + llm_response = await base_generator.generate_messages([message_seq], [p]) + responses.extend(llm_response) + return responses + + +register_generator("pr_decorator", PRDecorator) + + +async def generate_pr_description(diff_text: str) -> str: + """Generate a PR description from the diff text""" + diff_tokens = len(diff_text) // 4 + if diff_tokens >= MAX_TOKENS: + char_limit = (MAX_TOKENS * 4) - len(TRUNCATION_WARNING) + diff_text = diff_text[:char_limit] + TRUNCATION_WARNING + + diff_data = PRDiffData(content=diff_text) + params = rg.GenerateParams( + extra={ + "api_key": os.environ["OPENAI_API_KEY"], + "diff_text": diff_text, + }, + temperature=0.7, + max_tokens=500, + ) + + generator = rg.get_generator("pr_decorator!gpt-4-turbo-preview", params=params) + prompt = f"""You are a helpful AI that generates clear and concise PR descriptions. + Analyze the provided diff between {PRDiffData.xml_example()} tags and create a summary using exactly this format: + + ### PR Summary + + #### Overview of Changes + + + #### Key Modifications + 1. ****: + 2. ****: + 3. ****: + (continue as needed) + + #### Potential Impact + - + - + - + (continue as needed) + + Here is the PR diff to analyze: + {diff_data.to_xml()}""" + + chat = await generator.chat(prompt).run() + return chat.last.content.strip() + + +async def main(): + """Main function for CI environment""" + if not os.environ.get("OPENAI_API_KEY"): + raise ValueError("OPENAI_API_KEY environment variable must be set") + + try: + diff_text = os.environ.get("GIT_DIFF", "") + if not diff_text: + raise ValueError("No diff found in GIT_DIFF environment variable") + + try: + diff_text = base64.b64decode(diff_text).decode("utf-8") + except Exception: + padding = 4 - (len(diff_text) % 4) + if padding != 4: + diff_text += "=" * padding + diff_text = base64.b64decode(diff_text).decode("utf-8") + + logger.debug(f"Processing diff of length: {len(diff_text)}") + description = await generate_pr_description(diff_text) + + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write("content<> $GITHUB_OUTPUT + + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b #v5.0.3 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip cache purge + pip install rigging[all] + + # Generate the description using the diff + - name: Generate PR Description + id: description + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PR_NUMBER: ${{ github.event.pull_request.number }} + GIT_DIFF: ${{ steps.diff.outputs.diff }} + run: | + python .github/scripts/rigging_pr_decorator.py + + # Update the PR description + - name: Update PR Description + uses: nefrob/pr-description@4dcc9f3ad5ec06b2a197c5f8f93db5e69d2fdca7 #v1.2.0 + with: + content: | + ## AI-Generated Summary + + ${{ steps.description.outputs.content }} + + --- + + This summary was generated with ❤️ by [rigging](https://rigging.dreadnode.io/) + regex: ".*" + regexFlags: s + token: ${{ secrets.GITHUB_TOKEN }} From f4d9b8abe408ebc9c6565a33f965195db6c175ee Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:56:53 -0500 Subject: [PATCH 2/5] fix: pin pydantic --- .github/workflows/rigging_pr_description.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rigging_pr_description.yml b/.github/workflows/rigging_pr_description.yml index 779a58d..b86a94c 100644 --- a/.github/workflows/rigging_pr_description.yml +++ b/.github/workflows/rigging_pr_description.yml @@ -34,6 +34,7 @@ jobs: run: | python -m pip install --upgrade pip pip cache purge + pip install pydantic==2.9.1 pip install rigging[all] # Generate the description using the diff From f4874ffe88a53156b43c6cb06a371ec098b2c932 Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Mon, 25 Nov 2024 14:25:33 -0500 Subject: [PATCH 3/5] chore: add pr template --- .github/PULL_REQUEST_TEMPLATE.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..6e926f3 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,5 @@ +# robopages + +## Welcome + +Hello and welcome to robopages, thanks for contributing! You can leave the PR description blank and let [rigging](https://github.com/dreadnode/rigging) perform some magic here. \ No newline at end of file From 9736b1fea162213fa269d7214368f85887b28067 Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Mon, 25 Nov 2024 17:06:58 -0500 Subject: [PATCH 4/5] feat: squealer example cicd go package --- cybersecurity/cicd/squealer.Dockerfile | 21 ++++++++++++++++ cybersecurity/cicd/squealer.yml | 33 ++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 cybersecurity/cicd/squealer.Dockerfile create mode 100644 cybersecurity/cicd/squealer.yml diff --git a/cybersecurity/cicd/squealer.Dockerfile b/cybersecurity/cicd/squealer.Dockerfile new file mode 100644 index 0000000..c29f378 --- /dev/null +++ b/cybersecurity/cicd/squealer.Dockerfile @@ -0,0 +1,21 @@ +# Git clone stage +FROM alpine:latest AS source +RUN apk add --no-cache git +WORKDIR /src +RUN git clone https://github.com/owenrumney/squealer.git . && \ + ls -la # Debug: verify files + +# Build stage +FROM golang:1.21-alpine AS builder +WORKDIR /build +COPY --from=source /src/ ./ +RUN ls -la && \ + go mod vendor && \ + go build -mod=vendor -ldflags="-w -s" -o squealer ./cmd/squealer + +# Final stage +FROM gcr.io/distroless/static-debian12:nonroot +WORKDIR /app +COPY --from=builder /build/squealer /app/ +USER nonroot:nonroot +ENTRYPOINT ["/app/squealer"] \ No newline at end of file diff --git a/cybersecurity/cicd/squealer.yml b/cybersecurity/cicd/squealer.yml new file mode 100644 index 0000000..caa748d --- /dev/null +++ b/cybersecurity/cicd/squealer.yml @@ -0,0 +1,33 @@ +description: Squealer is a tool that finds secrets like keys, tokens and passwords in your code. It scans remote Git repositories for potential credential leaks. + +functions: + squealer_scan_git_repo: + description: Scan a remote git repository for secrets and credentials + parameters: + repository: + type: string + description: Git repository URL (SSH format) + examples: + - "git@github.com:username/repo.git" + - "git@github.com:owenrumney/woopsie.git" + container: + force: true + image: squealer:latest + cmdline: + - ${repository} + + squealer_scan_everything_git_repo: + description: Scan a remote git repository and history for secrets and credentials with everything flag + parameters: + repository: + type: string + description: Git repository URL (SSH format) + examples: + - "git@github.com:username/repo.git" + - "git@github.com:owenrumney/woopsie.git" + container: + force: true + image: squealer:latest + cmdline: + - ${repository} + - --everything From 333db9e654369014ad9ff29860dc7b2d13dd088b Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Mon, 25 Nov 2024 17:17:10 -0500 Subject: [PATCH 5/5] fix: add categories while we wait for other prs to fix workflow --- cybersecurity/cicd/squealer.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cybersecurity/cicd/squealer.yml b/cybersecurity/cicd/squealer.yml index caa748d..61247ea 100644 --- a/cybersecurity/cicd/squealer.yml +++ b/cybersecurity/cicd/squealer.yml @@ -1,5 +1,9 @@ description: Squealer is a tool that finds secrets like keys, tokens and passwords in your code. It scans remote Git repositories for potential credential leaks. +categories: + - cybersecurity + - cicd + functions: squealer_scan_git_repo: description: Scan a remote git repository for secrets and credentials