Skip to content

Commit

Permalink
Merge branch 'main' into ads/eng-260-feature-dockerize-ffufai-and-rob…
Browse files Browse the repository at this point in the history
…opages-example
  • Loading branch information
GangGreenTemperTatum authored Dec 5, 2024
2 parents 945bf99 + 5965ed1 commit a8196b1
Show file tree
Hide file tree
Showing 8 changed files with 403 additions and 10 deletions.
5 changes: 5 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# robopages

## Welcome

Hello and welcome to robopages, thanks for contributing! You can leave the PR description blank and let [rigging](https://github.com/dreadnode/rigging) perform some magic here.
145 changes: 145 additions & 0 deletions .github/scripts/rigging_pr_decorator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import asyncio
import base64
import os
import typing as t

from pydantic import ConfigDict, StringConstraints

import rigging as rg
from rigging import logger
from rigging.generator import GenerateParams, Generator, register_generator

logger.enable("rigging")

MAX_TOKENS = 8000
TRUNCATION_WARNING = "\n\n**Note**: Due to the large size of this diff, some content has been truncated."
str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)]


class PRDiffData(rg.Model):
"""XML model for PR diff data"""

content: str_strip = rg.element()

@classmethod
def xml_example(cls) -> str:
return """<diff><content>example diff content</content></diff>"""


class PRDecorator(Generator):
"""Generator for creating PR descriptions"""

model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True)

api_key: str = ""
max_tokens: int = MAX_TOKENS

def __init__(self, model: str, params: rg.GenerateParams) -> None:
api_key = params.extra.get("api_key")
if not api_key:
raise ValueError("api_key is required in params.extra")

super().__init__(model=model, params=params, api_key=api_key)
self.api_key = api_key
self.max_tokens = params.max_tokens or MAX_TOKENS

async def generate_messages(
self,
messages: t.Sequence[t.Sequence[rg.Message]],
params: t.Sequence[GenerateParams],
) -> t.Sequence[rg.GeneratedMessage]:
responses = []
for message_seq, p in zip(messages, params):
base_generator = rg.get_generator(self.model, params=p)
llm_response = await base_generator.generate_messages([message_seq], [p])
responses.extend(llm_response)
return responses


register_generator("pr_decorator", PRDecorator)


async def generate_pr_description(diff_text: str) -> str:
"""Generate a PR description from the diff text"""
diff_tokens = len(diff_text) // 4
if diff_tokens >= MAX_TOKENS:
char_limit = (MAX_TOKENS * 4) - len(TRUNCATION_WARNING)
diff_text = diff_text[:char_limit] + TRUNCATION_WARNING

diff_data = PRDiffData(content=diff_text)
params = rg.GenerateParams(
extra={
"api_key": os.environ["OPENAI_API_KEY"],
"diff_text": diff_text,
},
temperature=0.7,
max_tokens=500,
)

generator = rg.get_generator("pr_decorator!gpt-4-turbo-preview", params=params)
prompt = f"""You are a helpful AI that generates clear and concise PR descriptions.
Analyze the provided diff between {PRDiffData.xml_example()} tags and create a summary using exactly this format:
### PR Summary
#### Overview of Changes
<overview paragraph>
#### Key Modifications
1. **<modification title>**: <description>
2. **<modification title>**: <description>
3. **<modification title>**: <description>
(continue as needed)
#### Potential Impact
- <impact point 1>
- <impact point 2>
- <impact point 3>
(continue as needed)
Here is the PR diff to analyze:
{diff_data.to_xml()}"""

chat = await generator.chat(prompt).run()
return chat.last.content.strip()


async def main():
"""Main function for CI environment"""
if not os.environ.get("OPENAI_API_KEY"):
raise ValueError("OPENAI_API_KEY environment variable must be set")

try:
diff_text = os.environ.get("GIT_DIFF", "")
if not diff_text:
raise ValueError("No diff found in GIT_DIFF environment variable")

try:
diff_text = base64.b64decode(diff_text).decode("utf-8")
except Exception:
padding = 4 - (len(diff_text) % 4)
if padding != 4:
diff_text += "=" * padding
diff_text = base64.b64decode(diff_text).decode("utf-8")

logger.debug(f"Processing diff of length: {len(diff_text)}")
description = await generate_pr_description(diff_text)

with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write("content<<EOF\n")
f.write(description)
f.write("\nEOF\n")
f.write(f"debug_diff_length={len(diff_text)}\n")
f.write(f"debug_description_length={len(description)}\n")
debug_preview = description[:500]
f.write("debug_preview<<EOF\n")
f.write(debug_preview)
f.write("\nEOF\n")

except Exception as e:
logger.error(f"Error in main: {e}")
raise


if __name__ == "__main__":
asyncio.run(main())
65 changes: 65 additions & 0 deletions .github/workflows/rigging_pr_description.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Update PR Description with Rigging

on:
pull_request:
types: [opened, synchronize]

jobs:
update-description:
runs-on: ubuntu-latest
permissions:
pull-requests: write
contents: read

steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 #v4.2.2
with:
fetch-depth: 0

# Get the diff first
- name: Get Diff
id: diff
run: |
git fetch origin ${{ github.base_ref }}
MERGE_BASE=$(git merge-base HEAD origin/${{ github.base_ref }})
# Encode the diff as base64 to preserve all characters
DIFF=$(git diff $MERGE_BASE..HEAD | base64 -w 0)
echo "diff=$DIFF" >> $GITHUB_OUTPUT
- uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b #v5.0.3
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip cache purge
pip install pydantic==2.9.1
pip install rigging[all]
# Generate the description using the diff
- name: Generate PR Description
id: description
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PR_NUMBER: ${{ github.event.pull_request.number }}
GIT_DIFF: ${{ steps.diff.outputs.diff }}
run: |
python .github/scripts/rigging_pr_decorator.py
# Update the PR description
- name: Update PR Description
uses: nefrob/pr-description@4dcc9f3ad5ec06b2a197c5f8f93db5e69d2fdca7 #v1.2.0
with:
content: |
## AI-Generated Summary
${{ steps.description.outputs.content }}
---
This summary was generated with ❤️ by [rigging](https://rigging.dreadnode.io/)
regex: ".*"
regexFlags: s
token: ${{ secrets.GITHUB_TOKEN }}
43 changes: 33 additions & 10 deletions .github/workflows/validate_robopages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ jobs:

- name: Validate Contribution Files
id: robopages-validation
continue-on-error: true
run: |
validate_file() {
local file="$1"
local tmp_file="/tmp/$(basename $file)"
local validation_status=0
if [[ ! "$file" =~ ^([a-zA-Z0-9_\-]+/)*[a-zA-Z0-9_\-]+\.yml$ ]]; then
echo "Invalid file path characters: $file"
Expand All @@ -42,33 +43,56 @@ jobs:
return 1
fi
# Create copy and inject categories if missing
cp "$file" "$tmp_file"
if ! grep -q "categories:" "$tmp_file"; then
# Extract categories from path
categories=$(dirname "$file" | tr '/' '\n' | awk 'NF' | sed 's/^/ - /')
# Inject categories into YAML
echo -e "\ncategories:\n$categories" >> "$tmp_file"
fi
docker pull dreadnode/robopages:latest
# Run validation with Docker socket mounted
# Run validation and capture the exit status
docker run --rm \
-v $(pwd):/workspace \
-v /var/run/docker.sock:/var/run/docker.sock \
-v "$tmp_file:/workspace/$(basename $file)" \
-w /workspace \
--privileged \
dreadnode/robopages:latest validate --path "$(printf '%q' "$file")" --skip-docker
dreadnode/robopages:latest validate --path "$(basename $file)" --skip-docker || validation_status=$?
rm "$tmp_file"
return $validation_status
}
# Initialize overall status
overall_status=0
# Get changed files using GitHub's provided variables
changed_files=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
grep '\.yml$' | grep -v '^.github/' || true)
# Validate each changed file
for file in $changed_files; do
echo "Validating $file..."
validate_file "$file" || exit 1
if ! validate_file "$file"; then
overall_status=1
echo "::error::Validation failed for $file"
fi
done
exit $overall_status
- name: Post validation status
if: always()
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #7.0.1
with:
script: |
const validation_status = '${{ steps.robopages-validation.outcome }}' === 'success' ? '✅ Validation successful' : '❌ Validation failed';
const validation_status = process.env.STATE_validation === '0'
? '✅ Validation successful'
: '❌ Validation failed';
const runUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`;
const timestamp = new Date().toISOString();
const body = [
Expand All @@ -78,17 +102,16 @@ jobs:
'',
'Please ensure your contribution follows the required format.',
'',
`🔍 [View Full Validation Details](${runUrl})`,
`[View Full Validation Details](${runUrl})`,
'',
'---',
`Run ID: \`${process.env.GITHUB_RUN_ID}\``,
`Workflow: ${process.env.GITHUB_WORKFLOW}`
].join('\n');
github.rest.pulls.createReview({
github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: context.issue.number,
body: body,
event: 'COMMENT'
issue_number: context.issue.number,
body: body
});
21 changes: 21 additions & 0 deletions cybersecurity/cicd/squealer.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Git clone stage
FROM alpine:latest AS source
RUN apk add --no-cache git
WORKDIR /src
RUN git clone https://github.com/owenrumney/squealer.git . && \
ls -la # Debug: verify files

# Build stage
FROM golang:1.21-alpine AS builder
WORKDIR /build
COPY --from=source /src/ ./
RUN ls -la && \
go mod vendor && \
go build -mod=vendor -ldflags="-w -s" -o squealer ./cmd/squealer

# Final stage
FROM gcr.io/distroless/static-debian12:nonroot
WORKDIR /app
COPY --from=builder /build/squealer /app/
USER nonroot:nonroot
ENTRYPOINT ["/app/squealer"]
37 changes: 37 additions & 0 deletions cybersecurity/cicd/squealer.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
description: Squealer is a tool that finds secrets like keys, tokens and passwords in your code. It scans remote Git repositories for potential credential leaks.

categories:
- cybersecurity
- cicd

functions:
squealer_scan_git_repo:
description: Scan a remote git repository for secrets and credentials
parameters:
repository:
type: string
description: Git repository URL (SSH format)
examples:
- "[email protected]:username/repo.git"
- "[email protected]:owenrumney/woopsie.git"
container:
force: true
image: squealer:latest
cmdline:
- ${repository}

squealer_scan_everything_git_repo:
description: Scan a remote git repository and history for secrets and credentials with everything flag
parameters:
repository:
type: string
description: Git repository URL (SSH format)
examples:
- "[email protected]:username/repo.git"
- "[email protected]:owenrumney/woopsie.git"
container:
force: true
image: squealer:latest
cmdline:
- ${repository}
- --everything
Loading

0 comments on commit a8196b1

Please sign in to comment.