Skip to content

Commit

Permalink
update kfp 'github issue summarization' example (kubeflow#823)
Browse files Browse the repository at this point in the history
* checkpointing

* more updates to keep gh summ pipelines example current
cleanup & update; remove obsolete pipelines
create 'preemptible' version of hosted kfp pipeline
notebook update, readme update

* in notebook, add kernel restart after pip install
minor pipeline cleanup
add archive version of pipeline

* fixed namespace glitch, cleaned up css positioning issue
  • Loading branch information
amygdala authored Oct 6, 2020
1 parent 31a4d5e commit 44d7222
Show file tree
Hide file tree
Showing 17 changed files with 251 additions and 198 deletions.
2 changes: 2 additions & 0 deletions github_issue_summarization/pipelines/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
This Kubeflow Pipelines example shows how to build a web app that summarizes GitHub issues using Kubeflow Pipelines to train and serve a model.
The pipeline trains a [Tensor2Tensor](https://github.com/tensorflow/tensor2tensor/) model on GitHub issue data, learning to predict issue titles from issue bodies. It then exports the trained model and deploys the exported model using [Tensorflow Serving](https://github.com/tensorflow/serving). The final step in the pipeline launches a web app, which interacts with the TF-Serving instance in order to get model predictions.

The example is designed to run on a Hosted KFP installation, installed via the [Cloud Console](https://console.cloud.google.com/ai-platform/pipelines/clusters) or via ['standalone' installation](https://www.kubeflow.org/docs/pipelines/installation/standalone-deployment/) instructions, but would also be straightforward to run on a Kubeflow installation with minor changes.

You can follow this example as a codelab: [g.co/codelabs/kfp-gis](https://g.co/codelabs/kfp-gis).

<!-- Or, you can run it as a [Cloud shell Tutorial](https://console.cloud.google.com/?cloudshell=true&cloudshell_git_repo=https://github.com/kubeflow/examples&working_dir=github_issue_summarization/pipelines&cloudshell_tutorial=tutorial.md). The source for the Cloud Shell tutorial is [here](tutorial.md). -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM ubuntu:16.04
FROM tensorflow/tensorflow:1.15.0-py3

RUN apt-get update -y

RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip

RUN easy_install pip

RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.11.0
RUN pip install pyyaml==3.12 six requests==2.18.4

# RUN apt-get update \
# && apt-get install -y python3-pip python3-dev wget unzip \
# && cd /usr/local/bin \
# && ln -s /usr/bin/python3 python \
# && pip3 install --upgrade pip

# RUN apt-get install -y wget unzip git

# RUN pip install --upgrade pip
# RUN pip install urllib3 certifi retrying
# RUN pip install google-cloud-storage


RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
unzip -qq google-cloud-sdk.zip -d tools && \
Expand All @@ -34,16 +47,9 @@ RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
touch /tools/google-cloud-sdk/lib/third_party/google.py

RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \
tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \
mkdir -p /tools/ks/bin && \
cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \
rm ks_0.11.0_linux_amd64.tar.gz && \
rm -r ks_0.11.0_linux_amd64

ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin
ENV PATH $PATH:/tools/google-cloud-sdk/bin

ADD build /ml

ENTRYPOINT ["python", "/ml/deploy-tf-serve.py"]

Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@ def main():
'If not set, assuming this runs in a GKE container and current ' +
'cluster is used.')
parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.')
parser.add_argument('--namespace', type=str, default='default')
args = parser.parse_args()

KUBEFLOW_NAMESPACE = 'kubeflow'
# KUBEFLOW_NAMESPACE = 'kubeflow'

# Make sure model dir exists before proceeding
retries = 0
Expand Down Expand Up @@ -90,7 +91,7 @@ def main():
with open(target_file, "w") as target:
data = f.read()
changed = data.replace('MODEL_NAME', args.model_name)
changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE)
changed1 = changed.replace('KUBEFLOW_NAMESPACE', args.namespace)
changed2 = changed1.replace('MODEL_PATH', args.model_path)
target.write(changed2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ spec:
app: MODEL_NAME
type: ClusterIP
---
apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
Expand All @@ -37,16 +37,19 @@ metadata:
namespace: KUBEFLOW_NAMESPACE
spec:
replicas: 1
selector:
matchLabels:
app: MODEL_NAME
template:
metadata:
labels:
app: MODEL_NAME
version: v1
spec:
volumes:
- name: gcp-credentials-user-gcp-sa
secret:
secretName: user-gcp-sa
# volumes:
# - name: gcp-credentials-user-gcp-sa
# secret:
# secretName: user-gcp-sa
containers:
- args:
- --port=9000
Expand All @@ -56,15 +59,15 @@ spec:
command:
- /usr/bin/tensorflow_model_server
image: tensorflow/serving
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /secret/gcp-credentials/user-gcp-sa.json
- name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE
value: /secret/gcp-credentials/user-gcp-sa.json
volumeMounts:
- mountPath: /secret/gcp-credentials
name: gcp-credentials-user-gcp-sa
imagePullPolicy: IfNotPresent
# env:
# - name: GOOGLE_APPLICATION_CREDENTIALS
# value: /secret/gcp-credentials/user-gcp-sa.json
# - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE
# value: /secret/gcp-credentials/user-gcp-sa.json
# volumeMounts:
# - mountPath: /secret/gcp-credentials
# name: gcp-credentials-user-gcp-sa
imagePullPolicy: Always
livenessProbe:
initialDelaySeconds: 30
periodSeconds: 30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev pyt

RUN easy_install pip

RUN pip install tensorflow-probability==0.5
RUN pip install tensorflow-probability==0.7
RUN pip install tensor2tensor==1.11.0
RUN pip install tensorflow-serving-api
RUN pip install tensorflow-serving-api==1.14.0
RUN pip install gunicorn
RUN pip install pyyaml==3.12 six==1.11.0
RUN pip install pandas
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM ubuntu:16.04
FROM ubuntu:18.04

RUN apt-get update -y
RUN apt-get update \
&& apt-get install -y python3-pip python3-dev wget unzip \
&& cd /usr/local/bin \
&& ln -s /usr/bin/python3 python \
&& pip3 install --upgrade pip

RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip
# RUN apt-get install -y wget unzip git

RUN easy_install pip
RUN pip install --upgrade pip
RUN pip install urllib3 certifi retrying
RUN pip install google-cloud-storage

RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.12.0

RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
unzip -qq google-cloud-sdk.zip -d tools && \
Expand All @@ -34,14 +39,8 @@ RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
touch /tools/google-cloud-sdk/lib/third_party/google.py

RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \
tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \
mkdir -p /tools/ks/bin && \
cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \
rm ks_0.11.0_linux_amd64.tar.gz && \
rm -r ks_0.11.0_linux_amd64

ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin
ENV PATH $PATH:/tools/google-cloud-sdk/bin

ADD build /ml

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
</head>
<body class="text-center">
<form class="form-signin" action="summary" method="post">
<img class="mb-4" src="https://assets-cdn.github.com/images/modules/logos_page/GitHub-Mark.png" alt="" width="72" height="72">
<h1 style="margin-top: -20px;">Github Issue Summarization</h1>
<!-- <img class="mb-4" src="https://assets-cdn.github.com/images/modules/logos_page/GitHub-Mark.png" alt="" width="72" height="72"> -->
<h1 style="margin-top: 20px;">Github Issue Summarization</h1>
<p style="margin-left: 20%; margin-right: 20%;">This app takes as input a Github issue body and predicts a title for it. Behind the scenes it uses a
<a href="https://github.com/tensorflow/tensor2tensor" target="_blank">Tensor2Tensor</a> TensorFlow model, served via <a href="https://github.com/tensorflow/serving/" target="_blank">TF-Serving </a>.</p>
<p>(Thanks to <a href="https://github.com/hamelsmu" target="_blank">Hamel Husain</a> for the original concept and source data.)</p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ def main():
'If not set, assuming this runs in a GKE container and current ' +
'cluster is used.')
parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.')
parser.add_argument('--namespace', type=str, default='default')
args = parser.parse_args()

KUBEFLOW_NAMESPACE = 'kubeflow'
# KUBEFLOW_NAMESPACE = 'kubeflow'

print("using model name: %s and namespace: %s" % (args.model_name, KUBEFLOW_NAMESPACE))
print("using model name: %s and namespace: %s" % (args.model_name, args.namespace))

logging.getLogger().setLevel(logging.INFO)
args_dict = vars(args)
Expand Down Expand Up @@ -70,7 +71,7 @@ def main():
with open(target_file, "w") as target:
data = f.read()
changed = data.replace('MODEL_NAME', args.model_name)
changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE).replace(
changed1 = changed.replace('KUBEFLOW_NAMESPACE', args.namespace).replace(
'GITHUB_TOKEN', args.github_token).replace(
'DATA_DIR', 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/')
target.write(changed1)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,3 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: webapp-MODEL_NAME
spec:
gateways:
- kubeflow-gateway
hosts:
- '*'
http:
- match:
- uri:
prefix: /webapp/
rewrite:
uri: /
route:
- destination:
host: MODEL_NAME-webappsvc.KUBEFLOW_NAMESPACE.svc.cluster.local
port:
number: 80
timeout: 300s

---

apiVersion: v1
kind: Service
metadata:
Expand Down Expand Up @@ -50,33 +26,36 @@ spec:

---

apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: Deployment
metadata:
name: MODEL_NAME-webapp
spec:
replicas: 1
selector:
matchLabels:
app: ghsumm
template:
metadata:
labels:
app: ghsumm
role: frontend
spec:
volumes:
- name: gcp-credentials-user-gcp-sa
secret:
secretName: user-gcp-sa
# volumes:
# - name: gcp-credentials-user-gcp-sa
# secret:
# secretName: user-gcp-sa
containers:
- name: MODEL_NAME-webapp
image: gcr.io/google-samples/ml-pipeline-t2tapp:v3ap
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /secret/gcp-credentials/user-gcp-sa.json
- name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE
value: /secret/gcp-credentials/user-gcp-sa.json
volumeMounts:
- mountPath: /secret/gcp-credentials
name: gcp-credentials-user-gcp-sa
image: gcr.io/google-samples/ml-pipeline-t2tapp:vap9
# env:
# - name: GOOGLE_APPLICATION_CREDENTIALS
# value: /secret/gcp-credentials/user-gcp-sa.json
# - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE
# value: /secret/gcp-credentials/user-gcp-sa.json
# volumeMounts:
# - mountPath: /secret/gcp-credentials
# name: gcp-credentials-user-gcp-sa
# resources:
# limits:
# nvidia.com/gpu: 1
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import kfp.dsl as dsl
import kfp.gcp as gcp
import kfp.components as comp


COPY_ACTION = 'copy_data'
TRAIN_ACTION = 'train'
DATASET = 'dataset'
MODEL = 'model'

copydata_op = comp.load_component_from_url(
'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml' # pylint: disable=line-too-long
)

train_op = comp.load_component_from_url(
'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/train_component.yaml' # pylint: disable=line-too-long
)


@dsl.pipeline(
name='Github issue summarization',
description='Demonstrate Tensor2Tensor-based training and TF-Serving'
)
def gh_summ( #pylint: disable=unused-argument
train_steps: 'Integer' = 2019300,
project: str = 'YOUR_PROJECT_HERE',
github_token: str = 'YOUR_GITHUB_TOKEN_HERE',
working_dir: 'GCSPath' = 'gs://YOUR_GCS_DIR_HERE',
checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',
deploy_webapp: str = 'true',
data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'
):


copydata = copydata_op(
data_dir=data_dir,
checkpoint_dir=checkpoint_dir,
model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER),
action=COPY_ACTION,
)


train = train_op(
data_dir=data_dir,
model_dir=copydata.outputs['copy_output_path'],
action=TRAIN_ACTION, train_steps=train_steps,
deploy_webapp=deploy_webapp
)

serve = dsl.ContainerOp(
name='serve',
image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve:v6',
arguments=["--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),
"--model_path", train.outputs['train_output_path'], "--namespace", 'default'
]
)

train.set_gpu_limit(1)

with dsl.Condition(train.outputs['launch_server'] == 'true'):
webapp = dsl.ContainerOp(
name='webapp',
image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v1',
arguments=["--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),
"--github_token", github_token, "--namespace", 'default']

)
webapp.after(serve)


if __name__ == '__main__':
import kfp.compiler as compiler
compiler.Compiler().compile(gh_summ, __file__ + '.tar.gz')
Binary file not shown.
Loading

0 comments on commit 44d7222

Please sign in to comment.