Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --host=0.0.0.0 if running llama.cpp serve within a container #444

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/ramalama-serve.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ Generate specified configuration format for running the AI Model as a service
#### **--help**, **-h**
show this help message and exit

#### **--host**="0.0.0.0"
ip address to listen

#### **--name**, **-n**
Name of the container to run the Model in.

Expand Down
1 change: 1 addition & 0 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,7 @@ def serve_parser(subparsers):
parser = subparsers.add_parser("serve", help="serve REST API on specified AI Model")
parser.add_argument("--authfile", help="path of the authentication file")
parser.add_argument("-d", "--detach", action="store_true", dest="detach", help="run the container in detached mode")
parser.add_argument("--host", default=config.get('host', "0.0.0.0"), help="ip address to listen")
parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run")
parser.add_argument(
"-p", "--port", default=config.get('port', "8080"), help="port for AI Model server to listen on"
Expand Down
12 changes: 7 additions & 5 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,6 @@ def run(self, args):
if not args.container:
exec_model_path = model_path

# if args.container:
# model_path = mnt_file

exec_args = ["llama-cli", "-m", exec_model_path, "--in-prefix", "", "--in-suffix", ""]

if not args.debug:
Expand All @@ -289,6 +286,9 @@ def run(self, args):
try:
if self.exec_model_in_container(model_path, exec_args, args):
return
if args.dryrun:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Consider extracting duplicate dryrun handling logic into a helper method

The dryrun handling logic is duplicated between run() and serve(). Consider creating a helper method to reduce code duplication.

def _handle_dryrun(self, exec_args):
    dry_run(exec_args)
    return True

if args.dryrun:
    return self._handle_dryrun(exec_args)

dry_run(exec_args)
return
exec_cmd(exec_args, args.debug, debug=args.debug)
except FileNotFoundError as e:
if in_container():
Expand Down Expand Up @@ -317,8 +317,7 @@ def serve(self, args):
else:
if args.gpu:
exec_args.extend(self.gpu_args())
if in_container():
exec_args.extend(["--host", "0.0.0.0"])
exec_args.extend(["--host", args.host])

if args.generate == "quadlet":
return self.quadlet(model_path, args, exec_args)
Expand All @@ -332,6 +331,9 @@ def serve(self, args):
try:
if self.exec_model_in_container(model_path, exec_args, args):
return
if args.dryrun:
dry_run(exec_args)
return
exec_cmd(exec_args, debug=args.debug)
except FileNotFoundError as e:
if in_container():
Expand Down
40 changes: 23 additions & 17 deletions test/system/030-run.bats
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,37 @@
load helpers

@test "ramalama --dryrun run basic output" {
skip_if_nocontainer

model=tiny
image=m_$(safename)

run_ramalama info
conman=$(jq .Engine <<< $output | tr -d '"' )
verify_begin="${conman} run --rm -i --label RAMALAMA --security-opt=label=disable --name"
if is_container; then
run_ramalama info
conman=$(jq .Engine <<< $output | tr -d '"' )
verify_begin="${conman} run --rm -i --label RAMALAMA --security-opt=label=disable --name"

run_ramalama --dryrun run ${model}
is "$output" "${verify_begin} ramalama_.*" "dryrun correct"
is "$output" ".*${model}" "verify model name"

run_ramalama --dryrun run ${model}
is "$output" "${verify_begin} ramalama_.*" "dryrun correct"
is "$output" ".*${model}" "verify model name"
run_ramalama --dryrun run --name foobar ${model}
is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
is "$output" ".*${model}" "verify model name"

run_ramalama --dryrun run --name foobar ${model}
is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
is "$output" ".*${model}" "verify model name"
run_ramalama --dryrun run --name foobar ${model}
is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"

run_ramalama --dryrun run --name foobar ${model}
is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
run_ramalama 1 --nocontainer run --name foobar tiny
is "${lines[0]}" "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"

run_ramalama 1 --nocontainer run --name foobar tiny
is "${lines[0]}" "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
RAMALAMA_IMAGE=${image} run_ramalama --dryrun run ${model}
is "$output" ".*${image} /bin/sh -c" "verify image name"
else
run_ramalama --dryrun run ${model}
is "$output" 'llama-cli -m /path/to/model --in-prefix --in-suffix --no-display-prompt -p.*' "dryrun correct"

RAMALAMA_IMAGE=${image} run_ramalama --dryrun run ${model}
is "$output" ".*${image} /bin/sh -c" "verify image name"
run_ramalama 1 run --name foobar tiny
is "${lines[0]}" "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
fi
}

@test "ramalama run tiny with prompt" {
Expand Down
44 changes: 28 additions & 16 deletions test/system/040-serve.bats
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,36 @@ load setup_suite
verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name"

@test "ramalama --dryrun serve basic output" {
skip_if_nocontainer

model=m_$(safename)

run_ramalama --dryrun serve ${model}
is "$output" "${verify_begin} ramalama_.*" "dryrun correct"
is "$output" ".*${model}" "verify model name"

run_ramalama --dryrun serve --name foobar ${model}
is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
is "$output" ".*${model}" "verify model name"

run_ramalama 1 serve --name foobar MODEL
is "$output" ".*Error: failed to pull .*MODEL" "dryrun correct with --name"

run_ramalama 1 --nocontainer serve --name foobar tiny
is "${lines[0]}" "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
run_ramalama stop --all
if is_container; then
run_ramalama --dryrun serve ${model}
is "$output" "${verify_begin} ramalama_.*" "dryrun correct"
is "$output" ".*${model}" "verify model name"

run_ramalama --dryrun serve --name foobar ${model}
is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
assert "$output" =~ ".*--host 0.0.0.0" "verify host 0.0.0.0 is added when run within container"
is "$output" ".*${model}" "verify model name"

run_ramalama --dryrun serve --host 127.1.2.3 --name foobar ${model}
assert "$output" =~ ".*--host 127.1.2.3" "verify --host is modified when run within container"
is "$output" ".*${model}" "verify model name"

run_ramalama 1 --nocontainer serve --name foobar tiny
is "${lines[0]}" "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
run_ramalama stop --all
else
run_ramalama --dryrun serve ${model}
assert "$output" =~ ".*--host 0.0.0.0" "Outside container sets host to 0.0.0.0"
run_ramalama --dryrun serve --host 127.0.0.1 ${model}
assert "$output" =~ ".*--host 127.0.0.1" "Outside container overrides host to 127.0.0.1"
run_ramalama 1 --nocontainer serve --name foobar tiny
is "${lines[0]}" "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"
fi

run_ramalama 1 serve MODEL
is "$output" ".*Error: failed to pull .*MODEL" "failed to pull model"
}

@test "ramalama --detach serve" {
Expand Down
Loading