-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathllm.sh
executable file
·352 lines (314 loc) · 11 KB
/
llm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
#!/usr/bin/env bash
# LLM Prompt Generator Script
# Author: Your Name
# Contact Information: Your Contact Info
# Description:
# This script generates an LLM prompt by aggregating specified files and directories.
# It inserts a '# Prompt' section at the top of the llm.md file for the user to input instructions.
# If the script is being run inside a VSCode devcontainer and the required environment variables are set,
# it will send the entire updated llm.md to the OpenAI API.
# The model's response is then inserted into llm.md after the user's prompt and before the '# Files' section.
# Code History:
# - [Date]: Initial script creation.
# - [Date]: Added functionality to interact with OpenAI API when environment variables are set.
# - [Date]: Updated script to generate content before prompting the user for instructions.
# - [Date]: Ensured script operates without environment variables, generating output as before.
# - [Date]: Fixed 'Argument list too long' error by using --rawfile instead of --arg with jq.
# - [Date]: Fixed 'Argument list too long' error with curl by writing request data to a temporary file.
# - [Date]: Fixed unclosed quote error in reduce_yaml_file function.
# Default values for JSON processing
JSON_MAX_SIZE=${JSON_MAX_SIZE:-20480} # Defaults to 20 KiB
JSON_MAX_DEPTH=${JSON_MAX_DEPTH:-10} # Defaults to depth of 10
JSON_DONT_MODIFY=(${JSON_DONT_MODIFY[@]}) # Files not to modify (defaults to empty list)
# Directories and files to include
INCLUDE_DIRS=("src" ".devcontainer" ".github")
INCLUDE_FILES=("Dockerfile" "lexicon.json" "prompt.md" "demo.xml" "README.md")
# Extensions to ignore
IGNORE_EXTENSIONS=("svg" "jpg" "png" "gif" "pdf" "zip" "tar" "gz")
# Output markdown file
OUTPUT_FILE="llm.md"
TEMP_FILE="${OUTPUT_FILE}.tmp"
# Create or clear the output file
echo "" > "$OUTPUT_FILE"
# Append the additional text to the output file
{
echo "# Requirements:"
echo ""
echo "## Language"
echo ""
echo "Always write in Australian English"
echo ""
echo "## Responses"
echo ""
echo "When refactoring or making changes to code, respond with complete, operable, files. Do not use placeholders to represent existing code that the user will need to replace."
echo ""
echo "## Technical and Coding Proficiency:"
echo "When providing code examples and revisions, adhere strictly to the relevant Google Style Guide ie For Python, follow the Google Python Style Guide; for Bash, follow the Google Bash Style Guide, etc. Furthermore:"
echo "1. **All code must be Google Style Guide compliant where one exists, best practice if not**."
echo "2. **All code must be fully typed in languages that support typing**, including variables."
echo "3. **When typing, the \`Any\` type must be avoided**. If it is required, detailed comments explaining why must be provided."
echo "4. **All code must be broken into the smallest logical functional components**."
echo "5. **Classes should be used where appropriate for functionality**."
echo "6. **All reasonable exceptions must be caught and handled**, including cleanup where appropriate."
echo "7. **All reasonable signals (including TERM, KILL, HUP, etc.) must be caught and handled appropriately**, including cleanup where appropriate."
echo "8. **All code must be very well documented inline**."
echo "9. **Examples should be included in comments where appropriate**."
echo "10. **When creating new files**, an appropriate **file header should be included**:"
echo " - The purpose and description of the file."
echo " - The author's name and contact information."
echo " - Code history and changes."
echo "11. **When creating a new file that is intended to be executed**, it should use the \`env\` shebang method:"
echo " \`\`\`python"
echo " #!/usr/bin/env python3"
echo " \`\`\`"
echo "12. Ensure all imports/includes are referenced in the code; do not import/include if not needed."
echo ""
echo "# Context"
echo ""
echo "## Date"
echo ""
echo "Today is $(date '+%A, %d %B %Y')"
echo ""
} >> "$OUTPUT_FILE"
# Append the "# Files" section
echo "# Files" >> "$OUTPUT_FILE"
# Function to process each file
process_file() {
local file_path="$1"
local file_extension="${file_path##*.}"
local file_name
file_name="$(basename "$file_path")"
local file_size
local dont_modify=false
# Check if the file is in JSON_DONT_MODIFY
for dont_modify_file in "${JSON_DONT_MODIFY[@]}"; do
if [[ "$file_name" == "$dont_modify_file" ]]; then
dont_modify=true
break
fi
done
# Append headings to the output file
{
echo ""
echo "## ${file_path}"
echo ""
echo "\`\`\`${file_extension}"
} >> "$OUTPUT_FILE"
# Process JSON and YAML files for size reduction
if [[ "$file_extension" == "json" || "$file_extension" == "yaml" || "$file_extension" == "yml" ]]; then
file_size=$(stat -c%s "$file_path")
if (( file_size > JSON_MAX_SIZE )) && [[ "$dont_modify" == false ]]; then
# Reduce the size of the file content
if [[ "$file_extension" == "json" ]]; then
reduce_json_file "$file_path" >> "$OUTPUT_FILE"
else
reduce_yaml_file "$file_path" >> "$OUTPUT_FILE"
fi
else
cat "$file_path" >> "$OUTPUT_FILE"
fi
else
cat "$file_path" >> "$OUTPUT_FILE"
fi
# Close the code block
{
echo ""
echo "\`\`\`"
echo ""
} >> "$OUTPUT_FILE"
}
# Function to reduce JSON file size by truncating arrays
reduce_json_file() {
local file_path="$1"
local depth="$JSON_MAX_DEPTH"
jq --argjson depth "$depth" '
def truncate($d):
if $d == 0 then
.
elif type == "array" then
if length > 2 then
[.[0], .[1], "... truncated ..."]
else
map(. | truncate($d - 1))
end
elif type == "object" then
with_entries(.value |= truncate($d - 1))
else
.
end;
truncate($depth)
' "$file_path"
}
# Function to reduce YAML file size by truncating arrays
reduce_yaml_file() {
local file_path="$1"
local depth="$JSON_MAX_DEPTH"
yq eval '
def truncate(d):
if d == 0 then
.
elif tag == "!!seq" then
if length > 2 then
[.[0], .[1], "... truncated ..."]
else
map(truncate(d - 1))
end
elif tag == "!!map" then
with(.[]; . = truncate(d - 1))
else
.
end;
truncate('"'"$depth"'"')
' "$file_path"
}
# Function to check if a file should be ignored based on extension
is_ignored() {
local file_path="$1"
local file_extension="${file_path##*.}"
# Check against ignored extensions
for ext in "${IGNORE_EXTENSIONS[@]}"; do
if [[ "$file_extension" == "$ext" ]]; then
# Check if the file is explicitly included
for include_file in "${INCLUDE_FILES[@]}"; do
if [[ "$file_path" == "$include_file" ]]; then
return 1 # Not ignored
fi
done
return 0 # Ignored
fi
done
return 1 # Not ignored
}
# Function to check if a file is binary
is_binary() {
local file_path="$1"
# Use grep to check for binary data in the file
if grep -qI "." "$file_path"; then
return 1 # Not binary
else
return 0 # Binary
fi
}
# Process each directory
for dir in "${INCLUDE_DIRS[@]}"; do
if [[ -d "$dir" ]]; then
find "$dir" -type f ! -path "*/__pycache__/*" | while IFS= read -r file; do
if ! is_ignored "$file" && ! is_binary "$file"; then
process_file "$file"
fi
done
fi
done
# Process each specific file
for file in "${INCLUDE_FILES[@]}"; do
if [[ -f "$file" ]] && ! is_ignored "$file" && ! is_binary "$file"; then
process_file "$file"
fi
done
# Insert the "# Prompt" section at the top of the file
{
echo "# Prompt"
echo ""
echo "[Write your instructions here. For example: \"Add functionality to my app that checks the stock market every five minutes.\"]"
echo ""
} > "$TEMP_FILE"
cat "$OUTPUT_FILE" >> "$TEMP_FILE"
# Replace the OUTPUT_FILE with TEMP_FILE
mv "$TEMP_FILE" "$OUTPUT_FILE"
# Open the llm.md file in VSCode
if command -v code >/dev/null 2>&1; then
code "$OUTPUT_FILE"
else
echo "Error: VSCode command 'code' not found."
exit 1
fi
# Check if the script is being run inside a VSCode devcontainer and required environment variables are set
if [[ (-n "$REMOTE_CONTAINERS" || -n "$CODESPACES") && -n "$LLM_SH_OPENAI_KEY" && -n "$LLM_SH_OPENAI_MODEL" ]]; then
# Wait for the user to edit and save the llm.md file
initial_mod_time=$(stat -c %Y "$OUTPUT_FILE")
echo "Waiting for you to edit and save llm.md..."
while true; do
sleep 1
new_mod_time=$(stat -c %Y "$OUTPUT_FILE")
if [[ "$new_mod_time" != "$initial_mod_time" ]]; then
echo "llm.md has been modified."
break
fi
done
# Determine if the model does not support system prompts or max tokens
MODEL="$LLM_SH_OPENAI_MODEL"
if [[ "$MODEL" == "o1-preview-2024-09-12" ]]; then
NO_SYSTEM_PROMPT=true
else
NO_SYSTEM_PROMPT=false
fi
# Prepare the request payload and write it to a temporary file
REQUEST_FILE=$(mktemp)
if [[ "$NO_SYSTEM_PROMPT" == "true" ]]; then
jq -n \
--arg model "$MODEL" \
--rawfile content "$OUTPUT_FILE" \
'{
"model": $model,
"messages": [
{
"role": "user",
"content": $content
}
]
}' > "$REQUEST_FILE"
else
# You can set a system prompt here if needed
SYSTEM_PROMPT="You are an assistant that helps with code and technical tasks."
jq -n \
--arg model "$MODEL" \
--arg system_prompt "$SYSTEM_PROMPT" \
--rawfile content "$OUTPUT_FILE" \
'{
"model": $model,
"messages": [
{
"role": "system",
"content": $system_prompt
},
{
"role": "user",
"content": $content
}
]
}' > "$REQUEST_FILE"
fi
# Send the content to the OpenAI API using the temporary file
echo "Sending your prompt to the OpenAI API..."
RESPONSE=$(curl -s https://api.openai.com/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LLM_SH_OPENAI_KEY" \
--data-binary @"$REQUEST_FILE")
# Remove the temporary request file
rm "$REQUEST_FILE"
# Extract the assistant's response
assistant_content=$(echo "$RESPONSE" | jq -r '.choices[0].message.content')
if [[ "$assistant_content" == "null" ]]; then
echo "Error: Failed to get a valid response from the OpenAI API."
echo "Response from API:"
echo "$RESPONSE"
exit 1
fi
# Insert the assistant's response into llm.md after the user's prompt and before the '# Files' section
{
# Extract the content before '# Files'
sed '/# Files/,$d' "$OUTPUT_FILE"
echo ""
echo "# Assistant's Response"
echo ""
echo "$assistant_content"
echo ""
# Include the '# Files' section and everything after
sed -n '/# Files/,$p' "$OUTPUT_FILE"
} > "$TEMP_FILE"
# Replace the OUTPUT_FILE with TEMP_FILE
mv "$TEMP_FILE" "$OUTPUT_FILE"
echo "Assistant's response has been added to $OUTPUT_FILE."
else
echo "LLM prompt file has been generated at ${OUTPUT_FILE}"
fi