diff --git a/01_pinot/Makefile b/01_pinot/Makefile index e498655..d58c2af 100644 --- a/01_pinot/Makefile +++ b/01_pinot/Makefile @@ -57,12 +57,12 @@ import: validate: @echo "\n🍷 Getting cluster info..." - @curl -sX GET http://localhost:9000/cluster/info -H 'accept: application/json' | jq . + @curl -sX GET http://localhost:9000/cluster/info -H 'accept: application/json' @echo "\n🍷 Getting Schemas..." @SCHEMAS=$$(curl -sX 'GET' \ 'http://localhost:9000/schemas' \ - -H 'accept: application/json' | jq .); \ + -H 'accept: application/json' ); \ if echo "$$SCHEMAS" | grep -q "movies"; then \ echo "Schema 'movies' found."; \ else \ diff --git a/02_pinot-kafka/Makefile b/02_pinot-kafka/Makefile index aec40a0..603c944 100644 --- a/02_pinot-kafka/Makefile +++ b/02_pinot-kafka/Makefile @@ -88,12 +88,12 @@ import: validate: @echo "\n🍷 Getting cluster info..." - @curl -sX GET http://localhost:9000/cluster/info -H 'accept: application/json' | jq . + @curl -sX GET http://localhost:9000/cluster/info -H 'accept: application/json' @echo "\n🍷 Getting Schemas..." @SCHEMAS=$$(curl -sX 'GET' \ 'http://localhost:9000/schemas' \ - -H 'accept: application/json' | jq .); \ + -H 'accept: application/json'); \ if echo "$$SCHEMAS" | grep -q "movie_ratings"; then \ echo "Schema 'movie_ratings' found."; \ else \ diff --git a/02_pinot-kafka/README.adoc b/02_pinot-kafka/README.adoc index 751b850..3c698f8 100644 --- a/02_pinot-kafka/README.adoc +++ b/02_pinot-kafka/README.adoc @@ -70,7 +70,7 @@ order by r.rating desc limit 10 ---- -== Clean Up +==== Clean Up * To stop and remove all services related to this part of the workshop, run: + [source,bash] @@ -78,7 +78,7 @@ order by r.rating desc make destroy ---- -== Troubleshooting +==== Troubleshooting * If encountering issues such as 'No space left on device' during the Docker build process, free up space using: + [source,bash] diff --git a/03_pinot-kafka-flink/Makefile b/03_pinot-kafka-flink/Makefile index 3c03980..d9db547 100644 --- a/03_pinot-kafka-flink/Makefile +++ b/03_pinot-kafka-flink/Makefile @@ -66,12 +66,12 @@ tables: validate: @echo "\n🍷 Getting cluster info..." - @curl -sX GET http://localhost:9000/cluster/info -H 'accept: application/json' | jq . + @curl -sX GET http://localhost:9000/cluster/info -H 'accept: application/json' @echo "\n🍷 Getting Schemas..." @SCHEMAS=$$(curl -sX 'GET' \ 'http://localhost:9000/schemas' \ - -H 'accept: application/json' | jq .); \ + -H 'accept: application/json'); \ if echo "$$SCHEMAS" | grep -q "rated_movies"; then \ echo "Schema 'rated_movies' found."; \ else \ diff --git a/03_pinot-kafka-flink/README.adoc b/03_pinot-kafka-flink/README.adoc index ebb13da..0801bee 100644 --- a/03_pinot-kafka-flink/README.adoc +++ b/03_pinot-kafka-flink/README.adoc @@ -99,6 +99,24 @@ FROM MovieRatings LIMIT 10; + [source,sql] ---- +CREATE TABLE RatedMoviesSink -- <1> +( + movieId INT, + title STRING, + releaseYear INT, + actors ARRAY, + rating DOUBLE, + ratingTime TIMESTAMP(3), + PRIMARY KEY (movieId) NOT ENFORCED -- Declare the PRIMARY KEY constraint +) WITH ( + 'connector' = 'upsert-kafka', -- This enables updates and deletes + 'topic' = 'rated_movies', + 'properties.bootstrap.servers' = 'kafka:9092', + 'key.format' = 'json', -- Key format is JSON, matching the value format + 'value.format' = 'json' -- Values are serialized in JSON + ); + +INSERT INTO RatedMoviesSink -- <2> SELECT m.movieId, m.title, m.releaseYear, @@ -110,6 +128,8 @@ FROM MovieRatings r Movies m ON r.movieId = m.movieId; ---- +<1> Defining a Kafka Sink +<2> Writing result of joins to Kafka sink ==== Step 4: Query Processed Data in Pinot * *Description:* After processing the data with Apache Flink, go to the Apache Pinot query console to query your rated and processed data. @@ -121,16 +141,16 @@ FROM MovieRatings r http://localhost:9000/#/query ---- -== Clean Up -To stop and remove all services related to this part of the workshop, execute: +==== Clean Up +* To stop and remove all services related to this part of the workshop, execute: + [source,bash] ---- make destroy ---- -== Troubleshooting -If encountering any issues during the setup or execution, check the logs of each service: +==== Troubleshooting +* If encountering any issues during the setup or execution, check the logs of each service: + [source,bash] ---- diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1fd69b7 --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +# Define the list of directories +DIRS := 01_pinot 02_pinot-kafka 03_pinot-kafka-flink + +# Default target +all: pull_images + +# Target to pull images in each directory +pull_images: + @for dir in $(DIRS); do \ + echo "Pulling Docker images in \033[1m$$dir\033[0m..."; \ + (cd $$dir && docker compose pull); \ + echo "Completed pulling images in \033[1m$$dir\033[0m."; \ + done + +# Target to stop all containers in each directory +stop_containers: + @for dir in $(DIRS); do \ + echo "Stopping all containers in \033[1m$$dir\033[0m..."; \ + (cd $$dir && docker compose down -v); \ + echo "All containers in \033[1m$$dir\033[0m have been stopped and volumes removed."; \ + done + +.PHONY: all pull_images stop_containers diff --git a/README.adoc b/README.adoc index bb292e0..862ff19 100644 --- a/README.adoc +++ b/README.adoc @@ -2,7 +2,7 @@ Viktor Gamov v1.0, 2024-04-25 :toc: -:imagesdir: assets/images +:imagesdir: images :homepage: http://dev.startree.ai == Abstract @@ -118,18 +118,99 @@ Speakers: Viktor, Upkar === Equipment and Software Check * Ensure all participants have installed Docker Desktop and have the necessary system resources as outlined in the prerequisites. -== Setup Instructions +== Let's Get Going! +=== Before the Workshop -. *Clone the Repository:* +To ensure you are fully prepared for the workshop, please follow these guidelines: + +* *Version Control:* +** Check out the latest version of the workshop repository to access all necessary materials and scripts. ++ +[source,bash] +---- +git clone https://github.com/gAmUssA/uncorking-analytics-with-pinot-kafka-flink.git +cd uncorking-analytics-with-pinot-kafka-flink +---- + +* *Docker:* +** Install Docker if it isn't already installed on your system. Download it from https://www.docker.com/products/docker-desktop. +** Before the workshop begins, pull the necessary Docker images to ensure you have the latest versions: ++ +[source,bash] +---- +make pull_images +---- + +* *Integrated Development Environment (IDE):* +** Install Visual Studio Code (VSCode) to edit and view the workshop materials comfortably. +Download VSCode from https://code.visualstudio.com/. +** Add the AsciiDoc extension from the Visual Studio Code marketplace to enhance your experience with AsciiDoc formatted documents. + +=== During the Workshop + +* *Validate Setup:* +** Before diving into the workshop exercises, verify that all Docker containers needed for the workshop are running correctly: ++ +[source,bash] +---- +docker ps +---- +** This command helps confirm that there are no unforeseen issues with the Docker containers, ensuring a smooth operation during the workshop. + +* *Using VSCode:* +** Open the workshop directory in VSCode to access and edit files easily. +Use the AsciiDoc extension to view the formatted documents and instructions: ++ +[source,bash] +---- +code . +---- + +=== Troubleshooting Tips + +* *Docker Issues:* +** If Docker containers fail to start or crash, use the following command to inspect the logs and identify potential issues: ++ +[source,bash] +---- +docker logs +---- +** This can help in diagnosing problems with specific services. + +* *Network Issues:* +** Ensure no applications are blocking the required ports. If ports are in use or blocked, reconfigure the services to use alternative ports or stop the conflicting applications. + +=== Clean Up Post-Workshop + +* *Removing Docker Containers:* +** To clean up after the workshop, you might want to remove the Docker containers used during the session to free up resources: ++ +[source,bash] +---- +make stop_containers +---- +** Additionally, prune unused Docker images and volumes to recover disk space: + [source,bash] ---- -git clone https://github.com/gAmUssA/uncorking-analytics-with-pinot-kafka-flink && cd $_ +docker system prune -a +docker volume prune ---- +These steps and tips are designed to prepare you thoroughly for the workshop and to help address common issues that might arise, ensuring a focused and productive learning environment. + == Practice Part Overview -The practical exercises of this workshop are divided into three distinct parts, each designed to give you hands-on experience with Apache Pinot's capabilities in different scenarios. Below are the details and objectives for each part: +The practical exercises of this workshop are divided into three distinct parts, each designed to give you hands-on experience with Apache Pinot's capabilities in different scenarios. +Below are the details and objectives for each part: + +image::figure1.png[] include::01_pinot/README.adoc[] +image::figure2.png[] + include::02_pinot-kafka/README.adoc[] + +image::figure3.png[] + +include::03_pinot-kafka-flink/README.adoc[] \ No newline at end of file diff --git a/images/figure1.png b/images/figure1.png new file mode 100644 index 0000000..b1f3284 Binary files /dev/null and b/images/figure1.png differ diff --git a/images/figure2.png b/images/figure2.png new file mode 100644 index 0000000..d10b106 Binary files /dev/null and b/images/figure2.png differ diff --git a/images/figure3.png b/images/figure3.png new file mode 100644 index 0000000..7bbc3d7 Binary files /dev/null and b/images/figure3.png differ