diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index d1e3f58..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,244 +0,0 @@ -name: llm-app - build and public package -on: - push: - tags: - - 'v*.*.*' -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} -jobs: - Build_package: - name: Build package - strategy: - fail-fast: false - runs-on: ubuntu-22.04 - timeout-minutes: 30 - steps: - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Git checkout - uses: actions/checkout@v4 - - - name: Install poetry - uses: abatilo/actions-poetry@v3 - - - name: Build package - run: poetry build - - - name: Upload artifact - uses: actions/upload-artifact@v4 - with: - name: llm-app - path: ./dist/ - - - name: Upload artifact - uses: actions/upload-artifact@v4 - with: - name: CHANGELOG.md - path: CHANGELOG.md - - Verify_package: - needs: - - Build_package - name: Verify package - strategy: - matrix: - python-version: ["3.10", "3.11"] - fail-fast: false - runs-on: ubuntu-22.04 - timeout-minutes: 30 - steps: - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Git checkout - uses: actions/checkout@v4 - - - name: Install poetry - uses: abatilo/actions-poetry@v3 - - - name: create dir for wheels - run: | - mkdir wheels - - - uses: actions/download-artifact@v4 - with: - name: llm-app - path: ./wheels/ - - - name: Build ENV - run: | - cat < .env - APP_VARIANT=contextful - PATHWAY_REST_CONNECTOR_HOST=0.0.0.0 - PATHWAY_REST_CONNECTOR_PORT=8080 - OPENAI_API_KEY=${{ secrets.OPENAI_TOKEN }} - PATHWAY_CACHE_DIR=/tmp/cache - EOF - - - name: Install and verify ${{ matrix.os }} package - run: | - set -ex - ENV_NAME="testenv_llm_app" - rm -rf $ENV_NAME - python -m venv ${ENV_NAME} - source ${ENV_NAME}/bin/activate - pip install python-dotenv - pip install --prefer-binary wheels/*.whl - python ./run_examples.py contextful > /dev/null 2>&1 & - sleep 60 - curl -s --data '{"user": "user", "query": "How to connect to Kafka in Pathway?"}' http://localhost:8080/ - - Test_pypi: # test.pypi.org first - needs: - - Build_package - - Verify_package - name: Test pypi - strategy: - fail-fast: false - runs-on: ubuntu-22.04 - timeout-minutes: 15 - steps: - - name: Git checkout - uses: actions/checkout@v4 - - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: create dir for wheels - run: | - mkdir wheels - - - uses: actions/download-artifact@v4 - with: - name: llm-app - path: ./wheels/ - - - uses: actions/download-artifact@v4 - with: - name: CHANGELOG.md - path: . - - - name: Build ENV - run: | - cat < .env - APP_VARIANT=contextful - PATHWAY_REST_CONNECTOR_HOST=0.0.0.0 - PATHWAY_REST_CONNECTOR_PORT=8080 - OPENAI_API_KEY=${{ secrets.OPENAI_TOKEN }} - PATHWAY_CACHE_DIR=/tmp/cache - EOF - - # https://github.com/marketplace/actions/pypi-publish - # https://test.pypi.org/project/llm-app/ - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.TEST_PYPI_TOKEN }} - packages-dir: './wheels/' - repository-url: https://test.pypi.org/legacy/ - - - name: Install and verify ${{ matrix.os }} package - run: | - set -ex - ENV_NAME="testenv_llm_app" - rm -rf $ENV_NAME - python -m venv ${ENV_NAME} - source ${ENV_NAME}/bin/activate - pip install python-dotenv - pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple llm-app - pip show llm-app - python ./run_examples.py contextful > /dev/null 2>&1 & - sleep 60 - curl -s --data '{"user": "user", "query": "How to connect to Kafka in Pathway?"}' http://localhost:8080/ - Publish: - needs: - - Build_package - - Verify_package - - Test_pypi - environment: PROD - name: Publish package - strategy: - # When true GitHub will cancel all in-progress and queued jobs in the matrix if any job in the matrix fails. - fail-fast: false - runs-on: ubuntu-22.04 - timeout-minutes: 15 - steps: - # Add wheelhouse - - name: create dir for wheels - run: | - mkdir wheels - - - uses: actions/download-artifact@v4 - with: - name: llm-app - path: ./wheels/ - - - uses: actions/download-artifact@v4 - with: - name: CHANGELOG.md - path: . - - - name: Save package to S3 - uses: prewk/s3-cp-action@v2 - with: - aws_access_key_id: ${{ secrets.ARTIFACT_AWS_ACCESS_KEY_ID }} - aws_secret_access_key: ${{ secrets.ARTIFACT_AWS_SECRET_ACCESS_KEY }} - dest: ${{ secrets.ARTIFACT_AWS_BUCKET }} - source: ./wheels/*.whl - - # https://github.com/marketplace/actions/create-release - - name: Create Release - uses: ncipollo/release-action@v1.13.0 - with: - draft: true - artifacts: "./wheels/*.whl" - artifactContentType: "raw" - allowUpdates: true - bodyFile: "CHANGELOG.md" - tag: ${{github.ref_name}} - commit: main - - # https://github.com/marketplace/actions/pypi-publish - # https://pypi.org/project/llm-app/ - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_TOKEN }} - packages-dir: './wheels/' - - post-release-message: - runs-on: ubuntu-latest - needs: Publish - if: success() - steps: - - name: Post to a Slack channel - id: slack - uses: slackapi/slack-github-action@v1.23.0 - with: - # Slack channel id, channel name, or user id to post message. - # See also: https://api.slack.com/methods/chat.postMessage#channels - # You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs. - channel-id: "{{ secrets.SLACK_DEV_CHANNEL_ID }}" - # For posting a simple plain text message - payload: | - { - "text": ":tada:\n Hey, it's Manul here. I've made a new release llm-app ${{ github.event.release.tag_name }} released: has been published:\n${{ github.event.release.html_url }} \nhttps://pypi.org/project/llm-app/ \n:tada:", - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": ":tada:\n Hey, it's Manul here. I've made a new release llm-app ${{ github.event.release.tag_name }} released: has been published:\n${{ github.event.release.html_url }} \nhttps://pypi.org/project/llm-app/ \n:tada:" - } - } - ] - } - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CHANNEL_URL }} diff --git a/README.md b/README.md index 8445ee4..2fa3b45 100644 --- a/README.md +++ b/README.md @@ -26,10 +26,10 @@ The application templates provided in this repo scale up to **millions of pages | [`Question-Answering RAG App`](examples/pipelines/demo-question-answering/) | Basic end-to-end RAG app. A question-answering pipeline that uses the GPT model of choice to provide answers to queries to your documents (PDF, DOCX,...) on a live connected data source (files, Google Drive, Sharepoint,...). You can also try out a [demo REST endpoint](https://pathway.com/solutions/rag-pipelines#try-it-out). | | [`Live Document Indexing (Vector Store / Retriever)`](examples/pipelines/demo-document-indexing/) | A real-time document indexing pipeline for RAG that acts as a vector store service. It performs live indexing on your documents (PDF, DOCX,...) from a connected data source (files, Google Drive, Sharepoint,...). It can be used with any frontend, or integrated as a retriever backend for a [Langchain](https://pathway.com/developers/templates/langchain-integration) or [Llamaindex](https://pathway.com/developers/templates/llamaindex-pathway) application. You can also try out a [demo REST endpoint](https://pathway.com/solutions/ai-contract-management#try-it-out). | | [`Multimodal RAG pipeline with GPT4o`](examples/pipelines/gpt_4o_multimodal_rag/) | Multimodal RAG using GPT-4o in the parsing stage to index PDFs and other documents from a connected data source files, Google Drive, Sharepoint,...). It is perfect for extracting information from unstructured financial documents in your folders (including charts and tables), updating results as documents change or new ones arrive.| -| [`Adaptive RAG App`](examples/pipelines/adaptive-rag/) | A RAG application using Adaptive RAG, a technique developed by Pathway to reduce token cost in RAG up to 4x while maintaining accuracy. | -| [`Private RAG App with Mistral and Ollama`](examples/pipelines/private-rag/) | A fully private (local) version of the `demo-question-answering` RAG pipeline using Pathway, Mistral, and Ollama. | | [`Unstructured-to-SQL pipeline + SQL question-answering`](examples/pipelines/unstructured_to_sql_on_the_fly/) | A RAG example which connects to unstructured financial data sources (financial report PDFs), structures the data into SQL, and loads it into a PostgreSQL table. It also answers natural language user queries to these financial documents by translating them into SQL using an LLM and executing the query on the PostgreSQL table. | | [`Alerting when answers change on Google Drive`](examples/pipelines/drive_alert/) | Ask questions about your private data (docs), and tell the app to alert you whenever responses change. The app is always connected to your Google Docs folder and listening for changes. Whenever new relevant information is added to the data sources, the LLM decides if there is a substantial difference in response and notifies the user with a Slack message.| +| [`Adaptive RAG App`](examples/pipelines/adaptive-rag/) | A RAG application using Adaptive RAG, a technique developed by Pathway to reduce token cost in RAG up to 4x while maintaining accuracy. | +| [`Private RAG App with Mistral and Ollama`](examples/pipelines/private-rag/) | A fully private (local) version of the `demo-question-answering` RAG pipeline using Pathway, Mistral, and Ollama. | ## How do these LLM Apps work? diff --git a/examples/pipelines/alert/Dockerfile b/examples/pipelines/alert/Dockerfile deleted file mode 100644 index aed6703..0000000 --- a/examples/pipelines/alert/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM pathwaycom/pathway:latest -WORKDIR /app -COPY . . -EXPOSE 8080 - -CMD ["python", "app.py"] diff --git a/examples/pipelines/alert/README.md b/examples/pipelines/alert/README.md deleted file mode 100644 index c60866d..0000000 --- a/examples/pipelines/alert/README.md +++ /dev/null @@ -1,96 +0,0 @@ -

- - GCP Logo Deploy with GCP - | - - Render Logo Deploy with Render - -

- -# Real-time alerting based on local documents: End-to-end template - -This example implements a pipeline that answers questions based on documents in a given folder. Additionally, in your prompts you can ask to be notified of any changes - in such case an alert will be sent to a Slack channel. - -Upon starting, a REST API endpoint is opened by the app to serve queries about files inside -the input folder `data_dir`. - -We can create notifications by sending a query to API and stating we want to be notified of the changes. -One example would be `Tell me and alert about the start date of the campaign for Magic Cola` - -What happens next? - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentations pages -and sent to the OpenAI GPT3.5 chat service for processing and answering. - -Once you run, Pathway looks for any changes in data sources and efficiently detects changes -to the relevant documents. When a change is detected, the LLM is asked to answer the query -again, and if the new answer is sufficiently different, an alert is created. - -## How to run the project - -### Setup Slack notifications: - -For this demo, Slack notifications are optional and notifications will be printed if no Slack API keys are provided. See: [Slack Apps](https://api.slack.com/apps) and [Getting a token](https://api.slack.com/tutorials/tracks/getting-a-token). -Your Slack application will need at least `chat:write.public` scope enabled. - -### Setup environment: -Set your env variables in the .env file placed in this directory. - -```bash -OPENAI_API_KEY=sk-... -SLACK_ALERT_CHANNEL_ID= -SLACK_ALERT_TOKEN= -PATHWAY_DATA_DIR= # If unset, defaults to ./data/live/. If running with Docker, when you change this variable you may need to change the volume mount. -PATHWAY_PERSISTENT_STORAGE= # Set this variable if you want to use caching -``` - -### Run with Docker - -To run jointly the Alert pipeline and a simple UI execute: - -```bash -docker compose up --build -``` - -Then, the UI will run at http://0.0.0.0:8501 by default. You can access it by following this URL in your web browser. - -The `docker-compose.yml` file declares a [volume bind mount](https://docs.docker.com/reference/cli/docker/container/run/#volume) that makes changes to files under `data/` made on your host computer visible inside the docker container. The files in `data/live` are indexed by the pipeline - you can paste new files there and they will impact the computations. - -### Run manually - -Alternatively, you can run each service separately. - -Make sure you have installed poetry dependencies. -```bash -poetry install --with examples -``` - -Then run: -```bash -poetry run python app.py -``` - -If all dependencies are managed manually rather than using poetry, you can alternatively use: -```bash -python app.py -``` - -To run the Streamlit UI, run: -```bash -streamlit run ui/server.py --server.port 8501 --server.address 0.0.0.0 -``` - -### Querying the pipeline - -To create alerts, you can call the REST API: - -```bash -curl --data '{ - "user": "user", - "query": "When does the magic cola campaign start? Alert me if the start date changes." -}' http://localhost:8080/ | jq -``` - -or access the Streamlit UI at `0.0.0.0:8501`. diff --git a/examples/pipelines/alert/__init__.py b/examples/pipelines/alert/__init__.py deleted file mode 100644 index 0565668..0000000 --- a/examples/pipelines/alert/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .app import run - -__all__ = ["run"] diff --git a/examples/pipelines/alert/app.py b/examples/pipelines/alert/app.py deleted file mode 100644 index 008800c..0000000 --- a/examples/pipelines/alert/app.py +++ /dev/null @@ -1,268 +0,0 @@ -""" -Microservice for a context-aware alerting ChatGPT assistant. - -This demo is very similar to `contextful` example with an additional real time alerting capability. -In the demo, alerts are sent to Slack (you need `slack_alert_channel_id` and `slack_alert_token`), -you can either put these env variables in .env file under llm-app directory, -or create env variables in the terminal (ie. export in bash). - -Upon starting, a REST API endpoint is opened by the app to serve queries about files inside -the input folder `data_dir`. - -We can create notifications by sending a query to API and stating we want to be notified of the changes. -One example would be `Tell me and alert about the start date of the campaign for Magic Cola` - -What happens next? - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentations pages -and sent to the OpenAI GPT3.5 chat service for processing and answering. - -Once you run, Pathway looks for any changes in data sources and efficiently detects changes -to the relevant documents. When a change is detected, the LLM is asked to answer the query -again, and if the new answer is sufficiently different, an alert is created. - -Please check the README.md in this directory for how-to-run instructions. -""" - -import asyncio -import os - -import dotenv -import pathway as pw -from pathway.stdlib.ml.index import KNNIndex -from pathway.xpacks.llm.embedders import OpenAIEmbedder -from pathway.xpacks.llm.llms import OpenAIChat, prompt_chat_single_qa - -# To use advanced features with Pathway Scale, get your free license key from -# https://pathway.com/features and paste it below. -# To use Pathway Community, comment out the line below. -pw.set_license_key("demo-license-key-with-telemetry") - -dotenv.load_dotenv() - - -class DocumentInputSchema(pw.Schema): - doc: str - - -class QueryInputSchema(pw.Schema): - query: str - user: str - - -# Helper Functions -@pw.udf -def build_prompt(documents, query): - docs_str = "\n".join( - [f"Doc-({idx}) -> {doc}" for idx, doc in enumerate(documents[::-1])] - ) - prompt = f"""Given a set of documents, answer user query. If answer is not in docs, say it can't be inferred. - -Docs: {docs_str} -Query: '{query}' -Final Response:""" - return prompt - - -@pw.udf -def build_prompt_check_for_alert_request_and_extract_query(query: str) -> str: - prompt = f"""Evaluate the user's query and identify if there is a request for notifications on answer alterations: - User Query: '{query}' - - Respond with 'Yes' if there is a request for alerts, and 'No' if not, - followed by the query without the alerting request part. - - Examples: - "Tell me about windows in Pathway" => "No. Tell me about windows in Pathway" - "Tell me and alert about windows in Pathway" => "Yes. Tell me about windows in Pathway" - """ - return prompt - - -@pw.udf -def split_answer(answer: str) -> tuple[bool, str]: - alert_enabled = "yes" in answer[:3].lower() - true_query = answer[3:].strip(' ."') - return alert_enabled, true_query - - -def build_prompt_compare_answers(new: str, old: str) -> str: - prompt = f""" - Are the two following responses for a specific question deviating? - Answer with `Yes` or `No`. - - Example: - First response: "Joe is going to lead the session" - Second response: "Alice will be lead of this session" - Answer: Yes - - Example: - First response: "This car goes for around $45000" - Second response: "Price of this SUV is $45.000 including tax" - Answer: No - - Example: - First response: "New show will air in 2024" - Second response: "Premier of the new show is set to be in March 2024" - Answer: Yes - - First response: "{old}" - Second response: "{new}" - Answer: - """ - return prompt - - -def make_query_id(user, query) -> str: - return str(hash(query + user)) # + str(time.time()) - - -@pw.udf -def construct_notification_message(query: str, response: str) -> str: - return f'New response for question "{query}":\n{response}' - - -@pw.udf -def construct_message(response, alert_flag, metainfo=None): - if alert_flag: - if metainfo: - response += "\n" + str(metainfo) - return response + "\n\nšŸ”” Activated" - return response - - -def decision_to_bool(decision: str) -> bool: - return "yes" in decision.lower() - - -def run( - *, - data_dir: str = os.environ.get("PATHWAY_DATA_DIR", "./data/live/"), - api_key: str = os.environ.get("OPENAI_API_KEY", ""), - host: str = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "0.0.0.0"), - port: int = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", "8080")), - embedder_locator: str = "text-embedding-ada-002", - embedding_dimension: int = 1536, - model_locator: str = "gpt-3.5-turbo", - max_tokens: int = 400, - temperature: float = 0.0, - slack_alert_channel_id=os.environ.get("SLACK_ALERT_CHANNEL_ID", ""), - slack_alert_token=os.environ.get("SLACK_ALERT_TOKEN", ""), - **kwargs, -): - # Part I: Build index - embedder = OpenAIEmbedder( - api_key=api_key, - model=embedder_locator, - retry_strategy=pw.udfs.FixedDelayRetryStrategy(), - cache_strategy=pw.udfs.DefaultCache(), - ) - - documents = pw.io.jsonlines.read( - data_dir, - schema=DocumentInputSchema, - mode="streaming_with_deletions", - autocommit_duration_ms=50, - ) - - enriched_documents = documents + documents.select(data=embedder(pw.this.doc)) - - index = KNNIndex( - enriched_documents.data, enriched_documents, n_dimensions=embedding_dimension - ) - - # Part II: receive queries, detect intent and prepare cleaned query - - query, response_writer = pw.io.http.rest_connector( - host=host, - port=port, - schema=QueryInputSchema, - autocommit_duration_ms=50, - delete_completed_queries=False, - ) - - model = OpenAIChat( - api_key=api_key, - model=model_locator, - temperature=temperature, - max_tokens=max_tokens, - retry_strategy=pw.udfs.FixedDelayRetryStrategy(), - cache_strategy=pw.udfs.DefaultCache(), - ) - - query += query.select( - prompt=build_prompt_check_for_alert_request_and_extract_query(query.query) - ) - query += query.select( - tupled=split_answer(model(prompt_chat_single_qa(pw.this.prompt))), - ) - query = query.select( - pw.this.user, - alert_enabled=pw.this.tupled[0], - query=pw.this.tupled[1], - ) - - query += query.select( - data=embedder(pw.this.query), - query_id=pw.apply(make_query_id, pw.this.user, pw.this.query), - ) - - # Part III: respond to queries - - query_context = query + index.get_nearest_items(query.data, k=3).select( - documents_list=pw.this.doc - ).with_universe_of(query) - - prompt = query_context.select( - pw.this.query_id, - pw.this.query, - pw.this.alert_enabled, - prompt=build_prompt(pw.this.documents_list, pw.this.query), - ) - - responses = prompt.select( - pw.this.query_id, - pw.this.query, - pw.this.alert_enabled, - response=model(prompt_chat_single_qa(pw.this.prompt)), - ) - - output = responses.select( - result=construct_message(pw.this.response, pw.this.alert_enabled) - ) - - response_writer(output) - - # Part IV: send alerts about responses which changed significantly. - - responses = responses.filter(pw.this.alert_enabled) - - def acceptor(new: str, old: str) -> bool: - if new == old: - return False - - # TODO: clean after udfs can be used as common functions - prompt = [dict(role="system", content=build_prompt_compare_answers(new, old))] - decision = asyncio.run(model.__wrapped__(prompt, max_tokens=20)) - return decision_to_bool(decision) - - deduplicated_responses = pw.stateful.deduplicate( - responses, - col=responses.response, - acceptor=acceptor, - instance=responses.query_id, - ) - - alerts = deduplicated_responses.select( - message=construct_notification_message(pw.this.query, pw.this.response) - ) - - pw.io.slack.send_alerts(alerts.message, slack_alert_channel_id, slack_alert_token) - - pw.run(monitoring_level=pw.MonitoringLevel.NONE) - - -if __name__ == "__main__": - run() diff --git a/examples/pipelines/alert/data/live/documents.jsonl b/examples/pipelines/alert/data/live/documents.jsonl deleted file mode 100644 index 2f7a5c9..0000000 --- a/examples/pipelines/alert/data/live/documents.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -{"doc": "We will launch Logitech campaign in July 2023"} -{"doc": "Ohio store opening is delayed until further notice."} -{"doc": "Campaign for Magic Cola is going to start in November 2023."} diff --git a/examples/pipelines/alert/data/staging/documents_extra.jsonl b/examples/pipelines/alert/data/staging/documents_extra.jsonl deleted file mode 100644 index 8bf5f06..0000000 --- a/examples/pipelines/alert/data/staging/documents_extra.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"doc": "Country director had discussion with local managers and they agreed to push start of campaign of Magic Cola to January 1st, 2024. Please plan accordingly."} \ No newline at end of file diff --git a/examples/pipelines/alert/docker-compose.yml b/examples/pipelines/alert/docker-compose.yml deleted file mode 100644 index 20a3924..0000000 --- a/examples/pipelines/alert/docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: "3.8" -services: - pathway: - build: - context: . - ports: - - "8080:8080" - environment: - OPENAI_API_KEY: - PATHWAY_PERSISTENT_STORAGE: - volumes: - - "./data:/app/data" - streamlit_ui: - depends_on: - - pathway - build: - context: ./ui - ports: - - "8501:8501" - environment: - PATHWAY_REST_CONNECTOR_HOST: "pathway" diff --git a/examples/pipelines/alert/ui/Dockerfile b/examples/pipelines/alert/ui/Dockerfile deleted file mode 100644 index 78e2121..0000000 --- a/examples/pipelines/alert/ui/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.11 - -WORKDIR /app - -RUN pip install streamlit python-dotenv - -COPY . . - -EXPOSE 8501 - -CMD ["streamlit", "run", "server.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/examples/pipelines/alert/ui/server.py b/examples/pipelines/alert/ui/server.py deleted file mode 100644 index a8160fb..0000000 --- a/examples/pipelines/alert/ui/server.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -import requests -import streamlit as st -from dotenv import load_dotenv - -with st.sidebar: - st.markdown( - "[View the source code on GitHub](https://github.com/pathwaycom/llm-app)" - ) - -# Load environment variables -load_dotenv() -api_host = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "127.0.0.1") -api_port = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", 8080)) - - -# Streamlit UI elements -st.title("LLM App") - - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages from history on app rerun -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - -# React to user input -if prompt := st.chat_input("How can I help you today?"): - # Display user message in chat message container - with st.chat_message("user"): - st.markdown(prompt) - - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - url = f"http://{api_host}:{api_port}/" - data = {"query": prompt, "user": "user"} - - response = requests.post(url, json=data) - - if response.status_code == 200: - response = response.json() - with st.chat_message("assistant"): - st.markdown(response) - st.session_state.messages.append({"role": "assistant", "content": response}) - else: - st.error(f"Failed to send data. Status code: {response.status_code}") diff --git a/examples/pipelines/contextful/Dockerfile b/examples/pipelines/contextful/Dockerfile deleted file mode 100644 index aed6703..0000000 --- a/examples/pipelines/contextful/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM pathwaycom/pathway:latest -WORKDIR /app -COPY . . -EXPOSE 8080 - -CMD ["python", "app.py"] diff --git a/examples/pipelines/contextful/README.md b/examples/pipelines/contextful/README.md deleted file mode 100644 index f8b8113..0000000 --- a/examples/pipelines/contextful/README.md +++ /dev/null @@ -1,77 +0,0 @@ -

- - GCP Logo Deploy with GCP - | - - Render Logo Deploy with Render - -

- -# RAG pipeline with up-to-date knowledge: get answers based on documents in local folder - -This example implements a simple pipeline that answers questions based on documents in a given folder. - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentation pages -and sent to the OpenAI chat service for processing. - -## How to run the project - -### Setup environment: -Set your env variables in the .env file placed in this directory. - -```bash -OPENAI_API_KEY=sk-... -PATHWAY_DATA_DIR= # If unset, defaults to ./data/. If running with Docker, when you change this variable you may need to change the volume mount. -PATHWAY_PERSISTENT_STORAGE= # Set this variable if you want to use caching -``` - -### Run with Docker - -To run jointly the Alert pipeline and a simple UI execute: - -```bash -docker compose up --build -``` - -Then, the UI will run at http://0.0.0.0:8501 by default. You can access it by following this URL in your web browser. - -The `docker-compose.yml` file declares a [volume bind mount](https://docs.docker.com/reference/cli/docker/container/run/#volume) that makes changes to files under `data/` made on your host computer visible inside the docker container. The files in `data/live` are indexed by the pipeline - you can paste new files there and they will impact the computations. - -### Run manually - -Alternatively, you can run each service separately. - -Make sure you have installed poetry dependencies. -```bash -poetry install --with examples -``` - -Then run: -```bash -poetry run python app.py -``` - -If all dependencies are managed manually rather than using poetry, you can alternatively use: -```bash -python app.py -``` - -To run the Streamlit UI, run: -```bash -streamlit run ui/server.py --server.port 8501 --server.address 0.0.0.0 -``` - -### Querying the pipeline - -To query the pipeline, you can call the REST API: - -```bash -curl --data '{ - "user": "user", - "query": "How to connect to Kafka in Pathway?" -}' http://localhost:8080/ | jq -``` - -or access the Streamlit UI at `0.0.0.0:8501`. diff --git a/examples/pipelines/contextful/__init__.py b/examples/pipelines/contextful/__init__.py deleted file mode 100644 index 0565668..0000000 --- a/examples/pipelines/contextful/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .app import run - -__all__ = ["run"] diff --git a/examples/pipelines/contextful/app.py b/examples/pipelines/contextful/app.py deleted file mode 100644 index 7ed0eb3..0000000 --- a/examples/pipelines/contextful/app.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -Microservice for a context-aware ChatGPT assistant. - -The following program reads in a collection of documents, -embeds each document using the OpenAI document embedding model, -then builds an index for fast retrieval of documents relevant to a question, -effectively replacing a vector database. - -The program then starts a REST API endpoint serving queries about programming in Pathway. - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentation pages -and sent to the OpenAI chat service for processing. - -Please check the README.md in this directory for how-to-run instructions. -""" - -import os - -import dotenv -import pathway as pw -from pathway.stdlib.ml.index import KNNIndex -from pathway.xpacks.llm.embedders import OpenAIEmbedder -from pathway.xpacks.llm.llms import OpenAIChat, prompt_chat_single_qa - -# To use advanced features with Pathway Scale, get your free license key from -# https://pathway.com/features and paste it below. -# To use Pathway Community, comment out the line below. -pw.set_license_key("demo-license-key-with-telemetry") - -dotenv.load_dotenv() - - -class DocumentInputSchema(pw.Schema): - doc: str - - -class QueryInputSchema(pw.Schema): - query: str - user: str - - -def run( - *, - data_dir: str = os.environ.get("PATHWAY_DATA_DIR", "./data/"), - api_key: str = os.environ.get("OPENAI_API_KEY", ""), - host: str = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "0.0.0.0"), - port: int = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", "8080")), - embedder_locator: str = "text-embedding-ada-002", - embedding_dimension: int = 1536, - model_locator: str = "gpt-3.5-turbo", - max_tokens: int = 60, - temperature: float = 0.0, - **kwargs, -): - embedder = OpenAIEmbedder( - api_key=api_key, - model=embedder_locator, - retry_strategy=pw.udfs.FixedDelayRetryStrategy(), - cache_strategy=pw.udfs.DefaultCache(), - ) - - documents = pw.io.jsonlines.read( - data_dir, - schema=DocumentInputSchema, - mode="streaming", - autocommit_duration_ms=50, - ) - - enriched_documents = documents + documents.select(vector=embedder(pw.this.doc)) - - index = KNNIndex( - enriched_documents.vector, enriched_documents, n_dimensions=embedding_dimension - ) - - query, response_writer = pw.io.http.rest_connector( - host=host, - port=port, - schema=QueryInputSchema, - autocommit_duration_ms=50, - delete_completed_queries=True, - ) - - query += query.select(vector=embedder(pw.this.query)) - - query_context = query + index.get_nearest_items( - query.vector, k=3, collapse_rows=True - ).select(documents_list=pw.this.doc) - - @pw.udf - def build_prompt(documents, query): - docs_str = "\n".join(documents) - prompt = f"Given the following documents : \n {docs_str} \nanswer this query: {query}" - return prompt - - prompt = query_context.select( - prompt=build_prompt(pw.this.documents_list, pw.this.query) - ) - - model = OpenAIChat( - api_key=api_key, - model=model_locator, - temperature=temperature, - max_tokens=max_tokens, - retry_strategy=pw.udfs.FixedDelayRetryStrategy(), - cache_strategy=pw.udfs.DefaultCache(), - ) - - responses = prompt.select( - query_id=pw.this.id, result=model(prompt_chat_single_qa(pw.this.prompt)) - ) - - response_writer(responses) - - pw.run() - - -if __name__ == "__main__": - run() diff --git a/examples/pipelines/contextful/data/pathway-docs.jsonl b/examples/pipelines/contextful/data/pathway-docs.jsonl deleted file mode 100644 index 929737d..0000000 --- a/examples/pipelines/contextful/data/pathway-docs.jsonl +++ /dev/null @@ -1,415 +0,0 @@ -{"doc": "---\ntitle: pathway.io.sqlite package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.sqlite package\nFunctions\npw.io.sqlite.read(path, table_name, schema, *, autocommit_duration_ms=1500, debug_data=None)\nReads a table from a rowid table in SQLite database.\n* Parameters\n * path (`PathLike` | `str`) \u2013 Path to the database file.\n * table_name (`str`) \u2013 Name of the table in the database to be read.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 The maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "Notes\nThe CSV files should follow a standard CSV settings: the separator is \u2018,\u2019, the\nquotechar is \u2018\u201d\u2019, and there is no escape.\npw.demo.replay_csv_with_time(path, *, schema, time_column, unit='s', autocommit_ms=100, speedup=1)\nReplay a static CSV files as a data stream while respecting the time between updated based on a timestamp columns.\nThe timestamps in the file should be ordered positive integers.\n* Parameters\n * path (`str`) \u2013 Path to the file to stream.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * time_column (`str`) \u2013 Column containing the timestamps.\n * unit (`str`) \u2013 Unit of the timestamps. Only \u2018s\u2019, \u2018ms\u2019, \u2018us\u2019, and \u2018ns\u2019 are supported. Defaults to \u2018s\u2019.\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * speedup (`float`) \u2013 Produce stream speedup times faster than it would result from the time column.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "---\ntitle: Demo API\nsidebar: 'API'\nnavigation: true\n---\n# Demo API\nThe demo module allows you to create custom data streams from scratch or by utilizing a CSV file.\nThis feature empowers you to effectively test and debug your Pathway implementation using realtime data.\nPathway demo module\nTypical use:\n```python\nclass InputSchema(pw.Schema):\n name: str\n age: int\npw.demo.replay_csv(\"./input_stream.csv\", schema=InputSchema)\n```\n::\nResult\n```\n, 'age': }>\n```\n::\n::\nFunctions\npw.demo.generate_custom_stream(value_generators, *, schema, nb_rows=None, autocommit_duration_ms=1000, input_rate=1.0, persistent_id=None)\nGenerates a data stream.\nThe generator creates a table and periodically streams rows.\nIf a `nb_rows` value is provided, there are `nb_rows` row generated in total,\nelse the generator streams indefinitely.\nThe rows are generated iteratively and have an associated index x, starting from 0.\nThe values of each column are generated by their associated function in `value_generators`.\n* Parameters\n * value_generators (`dict`\\[`str`, `Any`\\]) \u2013 Dictionary mapping column names to functions that generate values for each column.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * nb_rows (`Optional`\\[`int`\\]) \u2013 The number of rows to generate. Defaults to None. If set to None, the generator\n generates streams indefinitely.\n * types \u2013 Dictionary containing the mapping between the columns and the data types (`pw.Type`) of the values of those columns. This parameter is optional, and if not provided the default type is `pw.Type.ANY`.\n * autocommit_duration_ms (`int`) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * input_rate (`float`) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *Table* \u2013 The generated table.\nExample:\n"} -{"doc": "---\ntitle: Demo API\nsidebar: 'API'\nnavigation: true\n---\n# Demo API\nThe demo module allows you to create custom data streams from scratch or by utilizing a CSV file.\nThis feature empowers you to effectively test and debug your Pathway implementation using realtime data.\nPathway demo module\nTypical use:\n```python\nvalue_functions = {\n 'number': lambda x: x + 1,\n 'name': lambda x: f'Person {x}',\n 'age': lambda x: 20 + x,\n}\nclass InputSchema(pw.Schema):\n number: int\n name: str\n age: int\npw.demo.generate_custom_stream(value_functions, schema=InputSchema, nb_rows=10)\n```\n::\nResult\n```\n, 'name': , 'age': }>\n```\n::\n::\nIn the above example, a data stream is generated with 10 rows, where each row has columns \u2018number\u2019, \u2018name\u2019, and \u2018age\u2019.\nThe \u2018number\u2019 column contains values incremented by 1 from 1 to 10, the \u2018name\u2019 column contains \u2018Person\u2019\nfollowed by the respective row index, and the \u2018age\u2019 column contains values starting from 20 incremented by\nthe row index.\npw.demo.noisy_linear_stream(nb_rows=10, input_rate=1.0)\nGenerates an artificial data stream for the linear regression tutorial.\n* Parameters\n * nb_rows (*int, optional*) \u2013 The number of rows to generate in the data stream. Defaults to 10.\n * input_rate (*float, optional*) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *pw.Table* \u2013 A table containing the generated data stream.\nExample:\n```python\ntable = pw.demo.noisy_linear_stream(nb_rows=100, input_rate=2.0)\n```\nIn the above example, an artificial data stream is generated with 100 rows. Each row has two columns, \u2018x\u2019 and \u2018y\u2019.\nThe \u2018x\u2019 values range from 0 to 99, and the \u2018y\u2019 values are equal to \u2018x\u2019 plus some random noise.\npw.demo.range_stream(nb_rows=30, offset=0, input_rate=1.0)\nGenerates a simple artificial data stream, used to compute the sum in our examples.\n* Parameters\n * nb_rows (*int, optional*) \u2013 The number of rows to generate in the data stream. Defaults to 30.\n * offset (*int, optional*) \u2013 The offset value added to the generated \u2018value\u2019 column. Defaults to 0.\n * input_rate (*float, optional*) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *pw.Table* \u2013 a table containing the generated data stream.\nExample:\n```python\ntable = pw.demo.range_stream(nb_rows=50, offset=10, input_rate=2.5)\n```\nIn the above example, an artificial data stream is generated with a single column \u2018value\u2019 and 50 rows.\nThe \u2018value\u2019 column contains values ranging from \u2018offset\u2019 (10 in this case) to \u2018nb_rows\u2019 + \u2018offset\u2019 (60).\npw.demo.replay_csv(path, *, schema, input_rate=1.0)\nReplay a static CSV files as a data stream.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file to stream.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * input_rate (*float, optional*) \u2013 The rate at which rows are read per second. Defaults to 1.0.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "---\ntitle: pathway.stdlib.statistical package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.statistical package\nFunctions\npw.statistical.interpolate(self, timestamp, *values, mode=InterpolateMode.LINEAR)\nInterpolates missing values in a column using the previous and next values based on a timestamps column.\n* Parameters\n * timestamp (*ColumnReference*) \u2013 Reference to the column containing timestamps.\n * \\*values (*ColumnReference*) \u2013 References to the columns containing values to be interpolated.\n * mode (*InterpolateMode, optional*) \u2013 The interpolation mode. Currently, only InterpolateMode.LINEAR is supported. Default is InterpolateMode.LINEAR.\n* Returns\n *Table* \u2013 A new table with the interpolated values.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference or if the interpolation mode is not supported.\nNOTE: * The interpolation is performed based on linear interpolation between the previous and next values.\n* If a value is missing at the beginning or end of the column, no interpolation is performed.\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values_a | values_b\n1 | 1 | 10\n2 | |\n3 | 3 |\n4 | |\n5 | |\n6 | 6 | 60\n''')\ntable = table.interpolate(pw.this.timestamp, pw.this.values_a, pw.this.values_b)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values_a | values_b\n1 | 1 | 10\n2 | 2.0 | 20.0\n3 | 3 | 30.0\n4 | 4.0 | 40.0\n5 | 5.0 | 50.0\n6 | 6 | 60\n```\n::\n::\n"} -{"doc": "pathway.stdlib.graphs.louvain_communities.impl module\nFunctions\npw.graphs.louvain_communities.impl.exact_modularity(G, C, round_digits=16)\nThis function computes modularity of a given weighted graph G with\nrespect to clustering C.\nThis implementation is meant to be used for testing / development,\nas computing exact value requires us to know the exact sum of the edge weights,\nwhich creates long dependency chains, and may be slow.\nThis implementation rounds the modularity to round_digits decimal places\n(default is 16), for result res it returns round(res, ndigits = round_digits)\n"} -{"doc": "pathway.stdlib.ml.classifiers.test_lsh module\npw.ml.classifiers.test_lsh.test_bucketer_cosine()\nVerifies that L buckets were indeed created\npw.ml.classifiers.test_lsh.test_bucketer_euclidean()\nVerifies that L buckets were indeed created\npw.ml.classifiers.test_lsh.test_lsh()\nVerifies that close points are mapped together and distant ones - apart.\npw.ml.classifiers.test_lsh.test_lsh_bucketing()\nVerifies that bucketing is properly indexed.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.classifiers package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.classifiers package\nFunctions\npw.ml.classifiers.knn_lsh_classifier_train(data, L, type='euclidean', kwargs)\nBuild the LSH index over data.\nL the number of repetitions of the LSH scheme.\nReturns a LSH projector of type (queries: Table, k:Any) -> Table\npw.ml.classifiers.knn_lsh_classify(knn_model, data_labels, queries, k)\nClassify the queries.\nUse the knn_model to extract the k closest datapoints.\nThe queries are then labeled using a majority vote between the labels\nof the retrieved datapoints, using the labels provided in data_labels.\npw.ml.classifiers.knn_lsh_euclidean_classifier_train(data, d, M, L, A)\nBuild the LSH index over data using the Euclidean distances.\nd is the dimension of the data, L the number of repetition of the LSH scheme,\nM and A are specific to LSH with Euclidean distance, M is the number of random projections\ndone to create each bucket and A is the width of each bucket on each projection.\npw.ml.classifiers.knn_lsh_generic_classifier_train(data, lsh_projection, distance_function, L)\nBuild the LSH index over data using the a generic lsh_projector and its associated distance.\nL the number of repetitions of the LSH scheme.\nReturns a LSH projector of type (queries: Table, k:Any) -> Table\npw.ml.classifiers.knn_lsh_train(data, L, type='euclidean', kwargs)\nBuild the LSH index over data.\nL the number of repetitions of the LSH scheme.\nReturns a LSH projector of type (queries: Table, k:Any) -> Table\n"} -{"doc": "---\ntitle: pathway.io.fs package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.fs package\nFunctions\npw.io.fs.read(path, format, *, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, object_pattern='*', with_metadata=False, persistent_id=None, autocommit_duration_ms=1500, debug_data=None, value_columns=None, primary_key=None, types=None, default_values=None)\nReads a table from one or several files with the specified format.\nIn case the folder is passed to the engine, the order in which files from the\ndirectory are processed is determined according to the modification time of files\nwithin this folder: they will be processed by ascending order of the modification time.\nIn case the format is \u201cplaintext\u201d, the table will consist of a single column\n`data` with each cell containing a single line from the file.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file or to the folder with files.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d, \u201cplaintext\u201d, \u201cplaintext_by_file\u201d and \u201cbinary\u201d formats are supported. The difference between \u201cplaintext\u201d and \u201cplaintext_by_file\u201d is how the input is tokenized: if the \u201cplaintext\u201d option is chosen, it\u2019s split by the newlines. Otherwise, the files are split in full and one row will correspond to one file. In case the \u201cbinary\u201d format is specified, the data is read as raw bytes without UTF-8 parsing.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d, \u201cstatic\u201d, and \u201cstreaming_with_deletions\u201d are supported. If set to \u201cstreaming\u201d the engine will wait for the new input files in the directory. On the other hand, \u201cstreaming_with_deletions\u201d mode also tracks file deletions and modifications and reflects them in the state. For example, if a file was deleted, \u201cstreaming_with_deletions\u201dmode will also remove rows obtained by reading this file from the table. Finally, the \u201cstatic\u201d mode will only consider the available data and ingest all of it in one commit. The default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * object_pattern (`str`) \u2013 Unix shell style pattern for filtering only certain files in the directory. Ignored in case a path to a single file is specified.\n * with_metadata (`bool`) \u2013 When set to true, the connector will add an additional column named `_metadata` to the table. This column will be a JSON field that will contain two optional fields - `created_at` and `modified_at`. These fields will have integral UNIX timestamps for the creation and modification time respectively. Additionally, the column will also have an optional field named `owner` that will contain the name of the file owner (applicable only for Un). Finally, the column will also contain a field named `path` that will show the full path to the file from where a row was filled.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Names of the columns to be extracted from the files. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. Supported in \u201ccsv\u201d and \u201cjson\u201d formats.\n \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entriest value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider you want to read a dataset, stored in the filesystem in a standard CSV\nformat. The dataset contains data about pets and their owners.\nFor the sake of demonstration, you can prepare a small dataset by creating a CSV file\nvia a unix command line tool:\n```bash\nprintf \"id,owner,pet\\n1,Alice,dog\\n2,Bob,dog\\n3,Alice,cat\\n4,Bob,dog\" > dataset.csv\n```\nIn order to read it into Pathway\u2019s table, you can first do the import and then\nuse the `pw.io.fs.read` method:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.fs.read(\"dataset.csv\", format=\"csv\", schema=InputSchema)\n```\nThen, you can output the table in order to check the correctness of the read:\nCode\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice dog\n Bob dog\nAlice cat\n Bob dog\n```\n::\n::\nSimilarly, we can do the same for JSON format.\nFirst, we prepare a dataset:\n```bash\nprintf \"{\\\"id\\\":1,\\\"owner\\\":\\\"Alice\\\",\\\"pet\\\":\\\"dog\\\"}\n{\\\"id\\\":2,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"dog\\\"}\n{\\\"id\\\":3,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"cat\\\"}\n{\\\"id\\\":4,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"cat\\\"}\" > dataset.jsonlines\n```\nAnd then, we use the method with the \u201cjson\u201d format:\n```python\nt = pw.io.fs.read(\"dataset.jsonlines\", format=\"json\", schema=InputSchema)\n```\nNow let\u2019s try something different. Consider you have site access logs stored in a\nseparate folder in several files. For the sake of simplicity, a log entry contains\nan access ID, an IP address and the login of the user.\nA dataset, corresponding to the format described above can be generated, thanks to the\nfollowing set of unix commands:\n```bash\nmkdir logs\nprintf \"id,ip,login\\n1,127.0.0.1,alice\\n2,8.8.8.8,alice\" > logs/part_1.csv\nprintf \"id,ip,login\\n3,8.8.8.8,bob\\n4,127.0.0.1,alice\" > logs/part_2.csv\n```\nNow, let\u2019s see how you can use the connector in order to read the content of this\ndirectory into a table:\n```python\nclass InputSchema(pw.Schema):\n ip: str\n login: str\nt = pw.io.fs.read(\"logs/\", format=\"csv\", schema=InputSchema)\n```\nThe only difference is that you specified the name of the directory instead of the\nfile name, as opposed to what you had done in the previous example. It\u2019s that simple!\nAlternatively, we can do the same for the \u201cjson\u201d variant:\nThe dataset creation would look as follows:\n```bash\nmkdir logs\nprintf \"{\\\"id\\\":1,\\\"ip\\\":\\\"127.0.0.1\\\",\\\"login\\\":\\\"alice\\\"}\n{\\\"id\\\":2,\\\"ip\\\":\\\"8.8.8.8\\\",\\\"login\\\":\\\"alice\\\"}\" > logs/part_1.jsonlines\nprintf \"{\\\"id\\\":3,\\\"ip\\\":\\\"8.8.8.8\\\",\\\"login\\\":\\\"bob\\\"}\n{\\\"id\\\":4,\\\"ip\\\":\\\"127.0.0.1\\\",\\\"login\\\":\\\"alice\\\"}\" > logs/part_2.jsonlines\n```\nWhile reading the data from logs folder can be expressed as:\n```python\nt = pw.io.fs.read(\"logs/\", format=\"json\", schema=InputSchema, mode=\"static\")\n```\nBut what if you are working with a real-time system, which generates logs all the time.\nThe logs are being written and after a while they get into the log directory (this is\nalso called \u201clogs rotation\u201d). Now, consider that there is a need to fetch the new files\nfrom this logs directory all the time. Would Pathway handle that? Sure!\nThe only difference would be in the usage of `mode` field. So the code\nsnippet will look as follows:\n```python\nt = pw.io.fs.read(\"logs/\", format=\"csv\", schema=InputSchema, mode=\"streaming\")\n```\nOr, for the \u201cjson\u201d format case:\n```python\nt = pw.io.fs.read(\"logs/\", format=\"json\", schema=InputSchema, mode=\"streaming\")\n```\nWith this method, you obtain a table updated dynamically. The changes in the logs would incur\nchanges in the Business-Intelligence \u2018BI\u2019-ready data, namely, in the tables you would like to output. To see\nhow these changes are reported by Pathway, have a look at the\n\u201cStreams of Updates and Snapshots\u201d\narticle.\nFinally, a simple example for the plaintext format would look as follows:\n```python\nt = pw.io.fs.read(\"raw_dataset/lines.txt\", format=\"plaintext\")\n```\npw.io.fs.write(table, filename, format)\nWrites `table`\u2019s stream of updates to a file in the given format.\n* Parameters\n * table (`Table`) \u2013 Table to be written.\n * filename (`str` | `PathLike`) \u2013 Path to the target output file.\n * format (`str`) \u2013 Format to use for data output. Currently, there are two supported\n formats: \u201cjson\u201d and \u201ccsv\u201d.\n* Returns\n None\nExample:\nIn this simple example you can see how table output works.\nFirst, import Pathway and create a table:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nConsider you would want to output the stream of changes of this table in csv format.\nIn order to do that you simply do:\n```python\npw.io.fs.write(t, \"table.csv\", format=\"csv\")\n```\nNow, let\u2019s see what you have on the output:\n```bash\ncat table.csv\n```\n```csv\nage,owner,pet,time,diff\n10,\"Alice\",\"dog\",0,1\n9,\"Bob\",\"cat\",0,1\n8,\"Alice\",\"cat\",0,1\n```\nThe first three columns clearly represent the data columns you have. The column time\nrepresents the number of operations minibatch, in which each of the rows was read. In\nthis example, since the data is static: you have 0. The diff is another\nelement of this stream of updates. In this context, it is 1 because all three rows were read from\nthe input. All in all, the extra information in `time` and `diff` columns - in this case -\nshows us that in the initial minibatch (`time = 0`), you have read three rows and all of\nthem were added to the collection (`diff = 1`).\nAlternatively, this data can be written in JSON format:\n```python\npw.io.fs.write(t, \"table.jsonlines\", format=\"json\")\n```\nThen, we can also check the output file by executing the command:\n```bash\ncat table.jsonlines\n```\n```json\n{\"age\":10,\"owner\":\"Alice\",\"pet\":\"dog\",\"diff\":1,\"time\":0}\n{\"age\":9,\"owner\":\"Bob\",\"pet\":\"cat\",\"diff\":1,\"time\":0}\n{\"age\":8,\"owner\":\"Alice\",\"pet\":\"cat\",\"diff\":1,\"time\":0}\n```\nAs one can easily see, the values remain the same, while the format has changed to a plain JSON.\n"} -{"doc": "Subpackages\n* pathway.stdlib.ml.classifiers package\n * `knn_lsh_classifier_train()`\n * `knn_lsh_classify()`\n * `knn_lsh_euclidean_classifier_train()`\n * `knn_lsh_generic_classifier_train()`\n * `knn_lsh_train()`\n * Submodules\n * pathway.stdlib.ml.classifiers.test_lsh module\n * `test_bucketer_cosine()`\n * `test_bucketer_euclidean()`\n * `test_lsh()`\n * `test_lsh_bucketing()`\n* pathway.stdlib.ml.datasets package\n * Subpackages\n * pathway.stdlib.ml.datasets.classification package\n* pathway.stdlib.ml.smart_table_ops package\n * `Edge`\n * `Feature`\n * `FuzzyJoinFeatureGeneration`\n * `FuzzyJoinFeatureGeneration.as_integer_ratio()`\n * `FuzzyJoinFeatureGeneration.bit_count()`\n * `FuzzyJoinFeatureGeneration.bit_length()`\n * `FuzzyJoinFeatureGeneration.conjugate()`\n * `FuzzyJoinFeatureGeneration.denominator`\n * `FuzzyJoinFeatureGeneration.from_bytes()`\n * `FuzzyJoinFeatureGeneration.imag`\n * `FuzzyJoinFeatureGeneration.numerator`\n * `FuzzyJoinFeatureGeneration.real`\n * `FuzzyJoinFeatureGeneration.to_bytes()`\n * `FuzzyJoinNormalization`\n * `FuzzyJoinNormalization.as_integer_ratio()`\n * `FuzzyJoinNormalization.bit_count()`\n * `FuzzyJoinNormalization.bit_length()`\n * `FuzzyJoinNormalization.conjugate()`\n * `FuzzyJoinNormalization.denominator`\n * `FuzzyJoinNormalization.from_bytes()`\n * `FuzzyJoinNormalization.imag`\n * `FuzzyJoinNormalization.numerator`\n * `FuzzyJoinNormalization.real`\n * `FuzzyJoinNormalization.to_bytes()`\n * `JoinResult`\n * `Node`\n"} -{"doc": "pathway.stdlib.ml.index module\nclass pw.ml.index.KNNIndex(data_embedding, data, n_dimensions, n_or=20, n_and=10, bucket_length=10.0, distance_type='euclidean')\nA K-Nearest Neighbors (KNN) index implementation using the Locality-Sensitive Hashing (LSH)\nalgorithm within Pathway. This index is designed to efficiently find the\nnearest neighbors of a given query embedding within a dataset.\n* Parameters\n * data_embedding (*pw.ColumnExpression*) \u2013 The column expression representing embeddings in the data.\n * data (*pw.Table*) \u2013 The table containing the data to be indexed.\n * n_dimensions (*int*) \u2013 number of dimensions in the data\n * n_or (*int*) \u2013 number of ORs\n * n_and (*int*) \u2013 number of ANDs\n * bucket_length (*float*) \u2013 bucket length (after projecting on a line)\n * distance_type (*str*) \u2013 euclidean metric is supported.\nget_nearest_items(query_embedding, k=3, collapse_rows=True)\nThis method queries the index with given queries and returns \u2018k\u2019 most relevant documents\nfor each query in the stream. While using this method, documents associated with\nthe queries will be updated if new more relevant documents appear.\nIf you don\u2019t want queries results to get updated in the future, take a look at\nget_nearest_items_asof_now.\n* Parameters\n * query_embedding (`ColumnReference`) \u2013 column of embedding vectors precomputed from the query.\n * k (`int`) \u2013 The number of most relevant documents to return for each query.\n Defaults to 3.\n * collapse_rows (`bool`) \u2013 Determines the format of the output. If set to True,\n multiple rows corresponding to a single query will be collapsed into a single row,\n with each column containing a tuple of values from the original rows. If set to False,\n the output will retain the multi-row format for each query. Defaults to True.\n* Returns\n pw.Table\n* If `collapse_rows` is set to True: Returns a table where each row corresponds to a unique query.\nEach column in the row contains a tuple (or list) of values, aggregating up\nto \u2018k\u2019 matches from the dataset.\nFor example:\n```text\n | name | age\n^YYY4HAB... | () | ()\n^X1MXHYY... | ('bluejay', 'cat', 'eagle') | (43, 42, 41)\n```\n* If `collapse_rows` is set to False: Returns a table where each row represents a match from the dataset\nfor a given query. Multiple rows can correspond to the same query, up to \u2018k\u2019 matches.\nExample:\n```text\nname | age | embedding | query_id\n | | | ^YYY4HAB...\nbluejay | 43 | (4, 3, 2) | ^X1MXHYY...\ncat | 42 | (3, 3, 2) | ^X1MXHYY...\neagle | 41 | (2, 3, 2) | ^X1MXHYY...\n```\nExample:\nCode\n```python\nimport pathway as pw\nimport pandas as pd\ndocuments = pw.debug.table_from_pandas(\n pd.DataFrame.from_records([\n {\"document\": \"document 1\", \"embeddings\":[1,-1, 0]},\n {\"document\": \"document 2\", \"embeddings\":[1, 1, 0]},\n {\"document\": \"document 3\", \"embeddings\":[0, 0, 1]},\n ])\n)\nindex = KNNIndex(documents.embeddings, documents, n_dimensions=3)\nqueries = pw.debug.table_from_pandas(\n pd.DataFrame.from_records([\n {\"query\": \"What is doc 3 about?\", \"embeddings\":[.1, .1, .1]},\n {\"query\": \"What is doc -5 about?\", \"embeddings\":[-1, 10, -10]},\n ])\n)\nrelevant_docs = index.get_nearest_items(queries.embeddings, k=2)\npw.debug.compute_and_print(relevant_docs)\n```\n::\nResult\n```\n | document | embeddings\n^YYY4HAB... | () | ()\n^X1MXHYY... | ('document 2', 'document 3') | ((1, 1, 0), (0, 0, 1))\n```\n::\n::\nget_nearest_items_asof_now(query_embedding, k=3, collapse_rows=True)\nThis method queries the index with given queries and returns \u2018k\u2019 most relevant documents\nfor each query in the stream. The already answered queries are not updated in\nthe future if new documents appear.\n* Parameters\n * query_embedding (`ColumnReference`) \u2013 column of embedding vectors precomputed from the query.\n * k (`int`) \u2013 The number of most relevant documents to return for each query.\n Defaults to 3.\n * collapse_rows (`bool`) \u2013 Determines the format of the output. If set to True,\n multiple rows corresponding to a single query will be collapsed into a single row,\n with each column containing a tuple of values from the original rows. If set to False,\n the output will retain the multi-row format for each query. Defaults to True.\nFor examples, see `get_nearest_items`.\n"} -{"doc": "---\ntitle: pathway.io.null package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.null package\nFunctions\npw.io.null.write(table)\nWrites `table`\u2019s stream of updates to the empty sink.\nInside this routine, the data is formatted into the empty object, and then doesn\u2019t\nget written anywhere.\n* Parameters\n table (`Table`) \u2013 Table to be written.\n* Returns\n None\nExample:\nOne (of a very few) examples, where you can probably need this kind of functionality\nif the case when a Pathway program is benchmarked and the IO part needs to be\nsimplified as much as possible.\nIf the table is `table`, the null output can be configured in the following way:\n```python\npw.io.null.write(table) \n```\n"} -{"doc": "---\ntitle: pathway.stdlib.indexing package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.indexing package\nclass pw.indexing.SortedIndex()\nclear(None. Remove all items from D.)\ncopy(a shallow copy of D)\nfromkeys(value=None, /)\nCreate a new dictionary with keys from iterable and values set to value.\nget(key, default=None, /)\nReturn the value for key if key is in the dictionary, else default.\nitems(a set-like object providing a view on D's items)\nkeys(a set-like object providing a view on D's keys)\npop(k, v, remove specified key and return the corresponding value.)\nIf the key is not found, return the default if given; otherwise,\nraise a KeyError.\npopitem()\nRemove and return a (key, value) pair as a 2-tuple.\nPairs are returned in LIFO (last-in, first-out) order.\nRaises KeyError if the dict is empty.\nsetdefault(key, default=None, /)\nInsert key with a value of default if key is not in the dictionary.\nReturn the value for key if key is in the dictionary, else default.\nupdate(FNone. Update D from dict/iterable E and F.)\nIf E is present and has a .keys() method, then does: for k in E: D\\[k\\] = E\\[k\\]\nIf E is present and lacks a .keys() method, then does: for k, v in E: D\\[k\\] = v\nIn either case, this is followed by: for k in F: D\\[k\\] = F\\[k\\]\nvalues(an object providing a view on D's values)\nFunctions\npw.indexing.retrieve_prev_next_values(ordered_table, value=None)\nRetrieve, for each row, a pointer to the first row in the ordered_table that contains a non-\u201cNone\u201d value, based on the orders defined by the prev and next columns.\n* Parameters\n * ordered_table (*pw.Table*) \u2013 Table with three columns: value, prev, next.\n The prev and next columns contain pointers to other rows.\n * value (*Optional\\[pw.ColumnReference\\]*) \u2013 Column reference pointing to the column containing values.\n If not provided, assumes the column name is \u201cvalue\u201d.\n* Returns\n *pw.Table* \u2013\n Table with two columns: prev_value and next_value.\n The prev_value column contains the values of the first row, according to the order defined by the column next, with a value different from None.\n The next_value column contains the values of the first row, according to the order defined by the column prev, with a value different from None.\n"} -{"doc": "pathway.stdlib.indexing.sorting module\nclass pw.indexing.sorting.Aggregate()\nclass pw.indexing.sorting.BinsearchOracle()\nclass pw.indexing.sorting.Candidate()\nclass pw.indexing.sorting.ComparisonRet()\nclass pw.indexing.sorting.Hash()\nclass pw.indexing.sorting.Instance()\nclass pw.indexing.sorting.Key()\nclass pw.indexing.sorting.LeftRight()\nclass pw.indexing.sorting.Node()\nclass pw.indexing.sorting.Parent()\nclass pw.indexing.sorting.PrefixSumOracle()\nclass pw.indexing.sorting.PrevNext()\nclass pw.indexing.sorting.SortedIndex()\nclear(None. Remove all items from D.)\ncopy(a shallow copy of D)\nfromkeys(value=None, /)\nCreate a new dictionary with keys from iterable and values set to value.\nget(key, default=None, /)\nReturn the value for key if key is in the dictionary, else default.\nitems(a set-like object providing a view on D's items)\nkeys(a set-like object providing a view on D's keys)\npop(k, v, remove specified key and return the corresponding value.)\nIf the key is not found, return the default if given; otherwise,\nraise a KeyError.\npopitem()\nRemove and return a (key, value) pair as a 2-tuple.\nPairs are returned in LIFO (last-in, first-out) order.\nRaises KeyError if the dict is empty.\nsetdefault(key, default=None, /)\nInsert key with a value of default if key is not in the dictionary.\nReturn the value for key if key is in the dictionary, else default.\nupdate(FNone. Update D from dict/iterable E and F.)\nIf E is present and has a .keys() method, then does: for k in E: D\\[k\\] = E\\[k\\]\nIf E is present and lacks a .keys() method, then does: for k, v in E: D\\[k\\] = v\nIn either case, this is followed by: for k in F: D\\[k\\] = F\\[k\\]\nvalues(an object providing a view on D's values)\nclass pw.indexing.sorting.Value()\npw.indexing.sorting.retrieve_prev_next_values(ordered_table, value=None)\nRetrieve, for each row, a pointer to the first row in the ordered_table that contains a non-\u201cNone\u201d value, based on the orders defined by the prev and next columns.\n* Parameters\n * ordered_table (*pw.Table*) \u2013 Table with three columns: value, prev, next.\n The prev and next columns contain pointers to other rows.\n * value (*Optional\\[pw.ColumnReference\\]*) \u2013 Column reference pointing to the column containing values.\n If not provided, assumes the column name is \u201cvalue\u201d.\n* Returns\n *pw.Table* \u2013\n Table with two columns: prev_value and next_value.\n The prev_value column contains the values of the first row, according to the order defined by the column next, with a value different from None.\n The next_value column contains the values of the first row, according to the order defined by the column prev, with a value different from None.\n"} -{"doc": "Usage\nReducers are used in `reduce` to compute the aggregated results obtained by a `groupby`:\n```python\nimport pathway as pw\n```\n```python\nmy_table.groupby(table.columnA).reduce(aggregated_result=pw.reducers.my_reducer(my_table.columnB))\n```\nWe use the following table `t` in the examples:\n```python\nt = pw.debug.table_from_markdown(\n \"\"\"\n | colA | colB | colC | colD\n 1 | valA | -1 | 5 | 4\n 2 | valA | 1 | 5 | 7\n 3 | valA | 2 | 5 | -3\n 4 | valB | 4 | 10 | 2\n 5 | valB | 4 | 10 | 6\n 6 | valB | 7 | 10 | 1\n \"\"\"\n)\npw.debug.compute_and_print(t)\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | colA | colB | colC | colD\n ^YYY4HAB... | valA | -1 | 5 | 4\n ^Z3QWT29... | valA | 1 | 5 | 7\n ^3CZ78B4... | valA | 2 | 5 | -3\n ^3HN31E1... | valB | 4 | 10 | 2\n ^3S2X6B2... | valB | 4 | 10 | 6\n ^A984WV0... | valB | 7 | 10 | 1\n"} -{"doc": "`tuple`\nReturn a tuple containing all the aggregated values. Order of values inside a tuple\nis consistent across application to many columns. If optional argument skip_nones is\nset to True, any Nones in aggregated values will be omitted from the result.\n```python\nt.groupby(t.colA).reduce(tuple_colB=pw.reducers.tuple(t.colB), tuple_colD=pw.reducers.tuple(t.colD))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | tuple_colB | tuple_colD\n ^ENHSR8M... | (-1, 1, 2) | (4, 7, -3)\n ^XN617D8... | (4, 4, 7) | (2, 6, 1)\n"} -{"doc": "`sorted_tuple`\nReturn a sorted tuple containing all the aggregated values. If optional argument skip_nones is\nset to True, any Nones in aggregated values will be omitted from the result.\n```python\nt.groupby(t.colA).reduce(tuples=pw.reducers.sorted_tuple(t.colB))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | tuples\n ^ENHSR8M... | (-1, 1, 2)\n ^XN617D8... | (4, 4, 7)\n"} -{"doc": "`ndarray`\nReturn an array containing all the aggregated values. Order of values inside an array\nis consistent across application to many columns. If optional argument skip_nones is\nset to True, any Nones in aggregated values will be omitted from the result.\n```python\nt.groupby(t.colA).reduce(tuple_colB=pw.reducers.ndarray(t.colB), tuple_colD=pw.reducers.ndarray(t.colD))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | tuple_colB | tuple_colD\n ^XN617D8... | [4 4 7] | [2 6 1]\n ^ENHSR8M... | [-1 1 2] | [ 4 7 -3]\n"} -{"doc": "`any`\nReturns any of the aggregated values. Values are consistent across application to many columns.\n```python\nt.groupby(t.colA).reduce(any_colB=pw.reducers.any(t.colB), any_colD=pw.reducers.any(t.colD))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | any_colB | any_colD\n ^ENHSR8M... | 2 | -3\n ^XN617D8... | 7 | 1\n"} -{"doc": "`unique`\nReturns aggregated value, if all values are identical. If values are not identical, exception is raised.\n```python\nt.groupby(t.colA).reduce(unique=pw.reducers.unique(t.colC))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | unique\n ^ENHSR8M... | 5\n ^XN617D8... | 10\n```python\nimport numpy as np\n```\n```python\n# ### `sum`\n#\n# Return the sum of the values of aggregated numpy arrays.\nimport pandas as pd\n```\n```python\nnp_table = pw.debug.table_from_pandas(\n pd.DataFrame(\n {\n \"data\": [\n np.array([1, 2, 3]),\n np.array([4, 5, 6]),\n np.array([7, 8, 9]),\n ]\n }\n )\n)\n```\n```python\nnp_table.reduce(data_sum=pw.reducers.sum(np_table.data))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | data_sum\n ^PWSRT42... | [12 15 18]\n"} -{"doc": "---\ntitle: pathway.io.minio package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.minio package\nclass pw.io.minio.MinIOSettings(endpoint, bucket_name, access_key, secret_access_key, *, with_path_style=True, region=None)\nStores MinIO bucket connection settings.\n* Parameters\n * endpoint \u2013 Endpoint for the bucket.\n * bucket_name \u2013 Name of a bucket.\n * access_key \u2013 Access key for the bucket.\n * secret_access_key \u2013 Secret access key for the bucket.\n * region \u2013 Region of the bucket.\n * with_path_style \u2013 Whether to use path-style addresses for bucket access. It defaults to True as this is the most widespread way to access MinIO, but can be overridden in case of a custom configuration.\nFunctions\npw.io.minio.read(path, minio_settings, format, *, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from one or several objects from S3 bucket in MinIO.\nIn case the prefix is specified, and there are several objects lying under this\nprefix, their order is determined according to their modification times: the smaller\nthe modification time is, the earlier the file will be passed to the engine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in MinIO S3 bucket.\n * minio_settings (`MinIOSettings`) \u2013 Connection parameters for the MinIO account and the bucket.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d and \u201cplaintext\u201d\n formats are supported.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new objects under the\n given path prefix. Set it to \u201cstatic\u201d, it will only consider the available\n data and ingest all of it. Default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider that there is a table, which is stored in CSV format in the min.io S3\nbucket. Then, you can use this method in order to connect and acquire its contents.\nIt may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.minio.read(\n \"animals/\",\n minio_settings=pw.io.minio.MinIOSettings(\n bucket_name=\"datasets\",\n endpoint=\"avv749.stackhero-network.com\",\n access_key=os.environ[\"MINIO_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"MINIO_S3_SECRET_ACCESS_KEY\"],\n ),\n format=\"csv\",\n schema=InputSchema,\n)\n```\n"} -{"doc": "---\ntitle: pathway.io.logstash package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.logstash package\nFunctions\npw.io.logstash.write(table, endpoint, n_retries=0, retry_policy=, connect_timeout_ms=None, request_timeout_ms=None)\nSends the stream of updates from the table to HTTP input \nof Logstash. The data is sent in the format of flat JSON objects, with two extra\nfields for time and diff.\n* Parameters\n * table (`Table`) \u2013 table to be tracked;\n * endpoint (`str`) \u2013 Logstash endpoint, accepting entries;\n * n_retries (`int`) \u2013 number of retries in case of failure;\n * retry_policy (`RetryPolicy`) \u2013 policy of delays or backoffs for the retries;\n * connect_timeout_ms (`Optional`\\[`int`\\]) \u2013 connection timeout, specified in milliseconds. In case it\u2019s None, no restrictions on connection duration will be applied;\n * request_timeout_ms (`Optional`\\[`int`\\]) \u2013 request timeout, specified in milliseconds. In case it\u2019s None, no restrictions on request duration will be applied.\nExample:\nSuppose that we need to send the stream of updates to locally installed Logstash.\nFor example, you can use docker-elk \nrepository in order to get the ELK stack up and running at your local machine in a\nfew minutes.\nIf Logstash stack is installed, you need to configure the input pipeline. The\nsimplest possible way to do this, is to add the following lines in the input plugins\nlist:\n```text\nhttp {\n port => 8012\n}\n```\nThe port is specified for the sake of example and can be changed. Further, we will\nuse 8012 for clarity.\nNow, with the pipeline configured, you can stream the changed into Logstash as\nsimple as:\n```python\npw.io.logstash.write(table, \"http://localhost:8012\") \n```\n"} -{"doc": "---\ntitle: pathway.io.csv package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.csv package\nFunctions\npw.io.csv.read(path, value_columns=None, *, schema=None, csv_settings=None, mode='streaming', object_pattern='*', with_metadata=False, autocommit_duration_ms=1500, persistent_id=None, debug_data=None, id_columns=None, types=None, default_values=None, kwargs)\nReads a table from one or several files with delimiter-separated values.\nIn case the folder is passed to the engine, the order in which files from\nthe directory are processed is determined according to the modification time of\nfiles within this folder: they will be processed by ascending order of\nthe modification time.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file or to the folder with files.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Names of the columns to be extracted from the files. \\[will be deprecated soon\\]\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * id_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d, \u201cstatic\u201d, and \u201cstreaming_with_deletions\u201d are supported. If set to \u201cstreaming\u201d the engine will wait for the new input files in the directory. On the other hand, \u201cstreaming_with_deletions\u201d mode also tracks file deletions and modifications and reflects them in the state. For example, if a file was deleted, \u201cstreaming_with_deletions\u201dmode will also remove rows obtained by reading this file from the table. Finally, the \u201cstatic\u201d mode will only consider the available data and ingest all of it in one commit. The default value is \u201cstreaming\u201d.\n * object_pattern (`str`) \u2013 Unix shell style pattern for filtering only certain files in the directory. Ignored in case a path to a single file is specified.\n * with_metadata (`bool`) \u2013 When set to true, the connector will add an additional column named `_metadata` to the table. This column will be a JSON field that will contain two optional fields - `created_at` and `modified_at`. These fields will have integral UNIX timestamps for the creation and modification time respectively. Additionally, the column will also have an optional field named `owner` that will contain the name of the file owner (applicable only for Un). Finally, the column will also contain a field named `path` that will show the full path to the file from where a row was filled.\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider you want to read a dataset, stored in the filesystem in a standard CSV\nformat. The dataset contains data about pets and their owners.\nFor the sake of demonstration, you can prepare a small dataset by creating a CSV file\nvia a unix command line tool:\n```bash\nprintf \"id,owner,pet\\n1,Alice,dog\\n2,Bob,dog\\n3,Alice,cat\\n4,Bob,dog\" > dataset.csv\n```\nIn order to read it into Pathway\u2019s table, you can first do the import and then\nuse the pw.io.csv.read method:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.csv.read(\"dataset.csv\", schema=InputSchema, mode=\"static\")\n```\nThen, you can output the table in order to check the correctness of the read:\nCode\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice dog\n Bob dog\nAlice cat\n Bob dog\n```\n::\n::\nNow let\u2019s try something different. Consider you have site access logs stored in a\nseparate folder in several files. For the sake of simplicity, a log entry contains\nan access ID, an IP address and the login of the user.\nA dataset, corresponding to the format described above can be generated, thanks to the\nfollowing set of unix commands:\n```bash\nmkdir logs\nprintf \"id,ip,login\\n1,127.0.0.1,alice\\n2,8.8.8.8,alice\" > logs/part_1.csv\nprintf \"id,ip,login\\n3,8.8.8.8,bob\\n4,127.0.0.1,alice\" > logs/part_2.csv\n```\nNow, let\u2019s see how you can use the connector in order to read the content of this\ndirectory into a table:\n```python\nclass InputSchema(pw.Schema):\n ip: str\n login: str\nt = pw.io.csv.read(\"logs/\", schema=InputSchema, mode=\"static\")\n```\nThe only difference is that you specified the name of the directory instead of the\nfile name, as opposed to what you had done in the previous example. It\u2019s that simple!\nBut what if you are working with a real-time system, which generates logs all the time.\nThe logs are being written and after a while they get into the log directory (this is\nalso called \u201clogs rotation\u201d). Now, consider that there is a need to fetch the new files\nfrom this logs directory all the time. Would Pathway handle that? Sure!\nThe only difference would be in the usage of mode flag. So the code\nsnippet will look as follows:\n```python\nt = pw.io.csv.read(\"logs/\", schema=InputSchema, mode=\"streaming\")\n```\nWith this method, you obtain a table updated dynamically. The changes in the logs would incur\nchanges in the Business-Intelligence \u2018BI\u2019-ready data, namely, in the tables you would like to output. To see\nhow these changes are reported by Pathway, have a look at the\n\u201cStreams of Updates and Snapshots\u201d\narticle.\npw.io.csv.write(table, filename)\nWrites table\u2019s stream of updates to a file in delimiter-separated values format.\n* Parameters\n * table (`Table`) \u2013 Table to be written.\n * filename (`str` | `PathLike`) \u2013 Path to the target output file.\n* Returns\n None\nExample:\nIn this simple example you can see how table output works.\nFirst, import Pathway and create a table:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nConsider you would want to output the stream of changes of this table. In order to do that\nyou simply do:\n```python\npw.io.csv.write(t, \"table.csv\")\n```\nNow, let\u2019s see what you have on the output:\n```bash\ncat table.csv\n```\n```csv\nage,owner,pet,time,diff\n10,\"Alice\",\"dog\",0,1\n9,\"Bob\",\"cat\",0,1\n8,\"Alice\",\"cat\",0,1\n```\nThe first three columns clearly represent the data columns you have. The column time\nrepresents the number of operations minibatch, in which each of the rows was read. In\nthis example, since the data is static: you have 0. The diff is another\nelement of this stream of updates. In this context, it is 1 because all three rows were read from\nthe input. All in all, the extra information in `time` and `diff` columns - in this case -\nshows us that in the initial minibatch (`time = 0`), you have read three rows and all of\nthem were added to the collection (`diff = 1`).\n"} -{"doc": "---\ntitle: pathway.io.gdrive package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.gdrive package\nFunctions\npw.io.gdrive.read(object_id, *, mode='streaming', refresh_interval=30, service_user_credentials_file)\nReads a table from a Google Drive directory or file.\nIt will return a table with single column data containing each file in a binary format.\n* Parameters\n * object_id (`str`) \u2013 id of a directory or file. Directories will be scanned recursively.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d\n and \u201cstatic\u201d are supported. If set to \u201cstreaming\u201d, it will check for updates, deletions\n and new files every refresh_interval seconds. \u201cstatic\u201d mode will only consider\n the available data and ingest all of it in one commit.\n The default value is \u201cstreaming\u201d.\n * refresh_interval (`int`) \u2013 time in seconds between scans. Applicable if mode is set to \u2018streaming\u2019.\n * service_user_credentials_file (`str`) \u2013 Google API service user json file.\n* Returns\n The table read.\nExample:\n```python\nimport pathway as pw\ntable = pw.io.gdrive.read(\n object_id=\"0BzDTMZY18pgfcGg4ZXFRTDFBX0j\",\n service_user_credentials_file=\"credentials.json\"\n)\n```\n"} -{"doc": "---\ntitle: pathway.io.plaintext package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.plaintext package\nFunctions\npw.io.plaintext.read(path, *, mode='streaming', object_pattern='*', with_metadata=False, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from a text file or a directory of text files. The resulting table\nwill consist of a single column `data`, and have the number of rows equal to the number\nof lines in the file. Each cell will contain a single line from the file.\nIn case the folder is specified, and there are several files placed in the folder,\ntheir order is determined according to their modification times: the smaller the\nmodification time is, the earlier the file will be passed to the engine.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to a file or to a folder.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d, \u201cstatic\u201d, and \u201cstreaming_with_deletions\u201d are supported. If set to \u201cstreaming\u201d the engine will wait for the new input files in the directory. On the other hand, \u201cstreaming_with_deletions\u201d mode also tracks file deletions and modifications and reflects them in the state. For example, if a file was deleted, \u201cstreaming_with_deletions\u201dmode will also remove rows obtained by reading this file from the table. Finally, the \u201cstatic\u201d mode will only consider the available data and ingest all of it in one commit. The default value is \u201cstreaming\u201d.\n * object_pattern (`str`) \u2013 Unix shell style pattern for filtering only certain files in the directory. Ignored in case a path to a single file is specified.\n * with_metadata (`bool`) \u2013 When set to true, the connector will add an additional column named `_metadata` to the table. This column will be a JSON field that will contain two optional fields - `created_at` and `modified_at`. These fields will have integral UNIX timestamps for the creation and modification time respectively. Additionally, the column will also have an optional field named `owner` that will contain the name of the file owner (applicable only for Un). Finally, the column will also contain a field named `path` that will show the full path to the file from where a row was filled.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will be persisted or `None`, if there is no need to persist the state of this table. When a program restarts, it restores the state for all input tables according to what was saved for their `persistent_id`. This way it\u2019s possible to configure the start of computations from the moment they were terminated last time.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\n```python\nimport pathway as pw\nt = pw.io.plaintext.read(\"raw_dataset/lines.txt\")\n```\n"} -{"doc": "---\ntitle: pathway.stdlib.graphs package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.graphs package\nclass pw.graphs.Edge()\nBasic edge class, holds pointers to the endpoint vertices.\nclass pw.graphs.Graph(V, E)\nBasic class representing undirected, unweighted (multi)graph.\nclass pw.graphs.Vertex()\nclass pw.graphs.WeightedGraph(V, E, WE)\nBasic class representing undirected, unweighted (multi)graph.\n"} -{"doc": "Subpackages\n* pathway.stdlib.graphs.bellman_ford package\n * `DistFromSource`\n * `Vertex`\n * Submodules\n * pathway.stdlib.graphs.bellman_ford.impl module\n * `Dist`\n * `DistFromSource`\n * `Vertex`\n* pathway.stdlib.graphs.louvain_communities package\n * Submodules\n * pathway.stdlib.graphs.louvain_communities.impl module\n * `exact_modularity()`\n* pathway.stdlib.graphs.pagerank package\n * `Result`\n * Submodules\n * pathway.stdlib.graphs.pagerank.impl module\n * `Result`\n"} -{"doc": "pathway.stdlib.graphs.common module\nclass pw.graphs.common.Cluster()\nclass pw.graphs.common.Clustering()\nClass describing cluster membership relation:\nvertex u (id-column) belongs to cluster c.\nclass pw.graphs.common.Edge()\nBasic edge class, holds pointers to the endpoint vertices.\nclass pw.graphs.common.Vertex()\nclass pw.graphs.common.Weight()\nBasic weight class. To be used as extension of Vertex / Edge\n"} -{"doc": "pathway.stdlib.graphs.graph module\nclass pw.graphs.graph.Graph(V, E)\nBasic class representing undirected, unweighted (multi)graph.\nclass pw.graphs.graph.WeightedGraph(V, E, WE)\nBasic class representing undirected, unweighted (multi)graph.\n"} -{"doc": "---\ntitle: pathway.io.debezium package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.debezium package\nFunctions\npw.io.debezium.read(rdkafka_settings, topic_name, *, db_type=, schema=None, debug_data=None, autocommit_duration_ms=1500, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None)\nConnector, which takes a topic in the format of Debezium\nand maintains a corresponding table in Pathway, on which you can do all the\ntable operations provided. In order to do that, you will need a Debezium connector.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of\n librdkafka.\n * topic_name (`str`) \u2013 Name of topic in Kafka to which the updates are streamed.\n * db_type (`DebeziumDBType`) \u2013 Type of the database from which events are streamed;\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider there is a need to stream a database table along with its changes directly into\nthe Pathway engine. One of the standard well-known solutions for table streaming is\nDebezium:\nit supports streaming data from MySQL, Postgres, MongoDB and a few more databases directly to a\ntopic in Kafka. The streaming first sends a snapshot of the data and then streams\nchanges for the specific change (namely: inserted, updated or removed) rows.\nConsider there is a table in Postgres, which is\ncreated according to the following schema:\n```sql\nCREATE TABLE pets (\n id SERIAL PRIMARY KEY,\n age INTEGER,\n owner TEXT,\n pet TEXT\n);\n```\nThis table, by default, will be streamed to the topic with the same name. In order to\nread it,you need to set the settings for `rdkafka`. For the sake of demonstration,\nlet\u2019s take those from the example of the Kafka connector:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nNow, using the settings you can set up a connector. It is as simple as:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n id: str = pw.column_definition(primary_key=True)\n age: int\n owner: str\n pet: str\nt = pw.io.debezium.read(\n rdkafka_settings,\n topic_name=\"pets\",\n schema=InputSchema\n)\n```\nAs a result, upon its start, the connector would provide the full snapshot of the\ntable `pets` into the table `t` in Pathway. The table `t` can then be operated as\nusual. Throughout the run time, the rows in the table `pets` can change. In this\ncase, the changes in the result will be provided in the output connectors by the\nStream of Updates mechanism.\n"} -{"doc": "---\ntitle: pathway.io.postgres package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.postgres package\nFunctions\npw.io.postgres.write(table, postgres_settings, table_name, max_batch_size=None)\nWrites `table`\u2019s stream of updates to a postgres table.\nIn order for write to be successful, it is required that the table contains `time`\nand `diff` columns of the integer type - you can refer to the article\n\u201cStreams of Updates and Snapshots\u201d\nto learn more about the reasoning behind it.\n* Parameters\n * postgres_settings (`dict`) \u2013 Components for the connection string for Postgres.\n * table_name (`str`) \u2013 Name of the target table.\n * max_batch_size (`Optional`\\[`int`\\]) \u2013 Maximum number of entries allowed to be committed within a single transaction.\n* Returns\n None\nExample:\nConsider there\u2019s a need to output a stream of updates from a table in Pathway to\na table in Postgres. Let\u2019s see how this can be done with the connector.\nFirst of all, one needs to provide the required credentials for Postgres\nconnection string.\nWhile the connection string can include a wide variety of settings, such as SSL\nor connection timeouts, in this example we will keep it simple and provide the\nsmallest example possible. Suppose that the database is running locally on the standard\nport 5432, that it has the name `database` and is accessible under the username\n`user` with a password `pass`.\nIt gives us the following content for the connection string:\n```python\nconnection_string_parts = {\n \"host\": \"localhost\",\n \"port\": \"5432\",\n \"dbname\": \"database\",\n \"user\": \"user\",\n \"password\": \"pass\",\n}\n```\nNow let\u2019s load a table, which we will output to the database:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice 1 \\n 2 9 Bob 1 \\n 3 8 Alice 2\")\n```\nIn order to output the table, we will need to create a new table in the database. The table\nwould need to have all the columns that the output data has. Moreover it will need\ninteger columns `time` and `diff`, because these values are an essential part of the\noutput. Finally, it is also a good idea to create the sequential primary key for\nour changes so that we know the updates\u2019 order.\nTo sum things up, the table creation boils down to the following SQL command:\n```sql\nCREATE TABLE pets (\n id SERIAL PRIMARY KEY,\n time INTEGER NOT NULL,\n diff INTEGER NOT NULL,\n age INTEGER,\n owner TEXT,\n pet TEXT\n);\n```\nNow, having done all the preparation, one can simply call:\n```python\npw.io.postgres.write(\n t,\n connection_string_parts,\n \"pets\",\n)\n```\npw.io.postgres.write_snapshot(table, postgres_settings, table_name, primary_key, max_batch_size=None)\nMaintains a snapshot of a table within a Postgres table.\nIn order for write to be successful, it is required that the table contains `time`\nand `diff` columns of the integer type - you can refer to the article\n\u201cStreams of Updates and Snapshots\u201d\nto understand the reasoning behind it.\n* Parameters\n * postgres_settings (`dict`) \u2013 Components of the connection string for Postgres.\n * table_name (`str`) \u2013 Name of the target table.\n * primary_key (`list`\\[`str`\\]) \u2013 Names of the fields which serve as a primary key in the Postgres table.\n * max_batch_size (`Optional`\\[`int`\\]) \u2013 Maximum number of entries allowed to be committed within a single transaction.\n* Returns\n None\nExample:\nConsider there is a table `stats` in Pathway, containing the average number of requests to some\nservice or operation per user, over some period of time. The number of requests\ncan be large, so we decide not to store the whole stream of changes, but to only store\na snapshot of the data, which can be actualized by Pathway.\nThe minimum set-up would require us to have a Postgres table with two columns: the ID\nof the user `user_id` and the number of requests across some period of time `number_of_requests`.\nIn order to maintain consistency, we also need two extra columns: `time` and `diff`.\nThe SQL for the creation of such table would look as follows:\n```sql\nCREATE TABLE user_stats (\n user_id TEXT PRIMARY KEY,\n number_of_requests INTEGER,\n time INTEGER NOT NULL,\n diff INTEGER NOT NULL\n);\n```\nAfter the table is created, all you need is just to set up the output connector:\n```python\nimport pathway as pw\npw.io.postgres.write_snapshot( \n stats,\n {\n \"host\": \"localhost\",\n \"port\": \"5432\",\n \"dbname\": \"database\",\n \"user\": \"user\",\n \"password\": \"pass\",\n },\n \"user_stats\",\n [\"user_id\"],\n)\n```\n"} -{"doc": "---\ntitle: pathway.io.jsonlines package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.jsonlines package\nFunctions\npw.io.jsonlines.read(path, *, schema=None, mode='streaming', json_field_paths=None, object_pattern='*', with_metadata=False, autocommit_duration_ms=1500, persistent_id=None, debug_data=None, value_columns=None, primary_key=None, types=None, default_values=None)\nReads a table from one or several files in jsonlines format.\nIn case the folder is passed to the engine, the order in which files from\nthe directory are processed is determined according to the modification time of\nfiles within this folder: they will be processed by ascending order of\nthe modification time.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file or to the folder with files.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d, \u201cstatic\u201d, and \u201cstreaming_with_deletions\u201d are supported. If set to \u201cstreaming\u201d the engine will wait for the new input files in the directory. On the other hand, \u201cstreaming_with_deletions\u201d mode also tracks file deletions and modifications and reflects them in the state. For example, if a file was deleted, \u201cstreaming_with_deletions\u201dmode will also remove rows obtained by reading this file from the table. Finally, the \u201cstatic\u201d mode will only consider the available data and ingest all of it in one commit. The default value is \u201cstreaming\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 This field allows to map field names into path in the field.\n For the field which require such mapping, it should be given in the format\n `: `, where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * object_pattern (`str`) \u2013 Unix shell style pattern for filtering only certain files in the directory. Ignored in case a path to a single file is specified.\n * with_metadata (`bool`) \u2013 When set to true, the connector will add an additional column named `_metadata` to the table. This column will be a JSON field that will contain two optional fields - `created_at` and `modified_at`. These fields will have integral UNIX timestamps for the creation and modification time respectively. Additionally, the column will also have an optional field named `owner` that will contain the name of the file owner (applicable only for Un). Finally, the column will also contain a field named `path` that will show the full path to the file from where a row was filled.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Names of the columns to be extracted from the files. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider you want to read a dataset, stored in the filesystem in a jsonlines\nformat. The dataset contains data about pets and their owners.\nFor the sake of demonstration, you can prepare a small dataset by creating a jsonlines\nfile via a unix command line tool:\n```bash\nprintf \"{\\\"id\\\":1,\\\"owner\\\":\\\"Alice\\\",\\\"pet\\\":\\\"dog\\\"}\n{\\\"id\\\":2,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"dog\\\"}\n{\\\"id\\\":3,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"cat\\\"}\n{\\\"id\\\":4,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"cat\\\"}\" > dataset.jsonlines\n```\nIn order to read it into Pathway\u2019s table, you can first do the import and then\nuse the `pw.io.jsonlines.read` method:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.jsonlines.read(\"dataset.jsonlines\", schema=InputSchema, mode=\"static\")\n```\nThen, you can output the table in order to check the correctness of the read:\nCode\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner | pet\nAlice | dog\nBob | dog\nBob | cat\nBob | cat\n```\n::\n::\nNow let\u2019s try something different. Consider you have site access logs stored in a\nseparate folder in several files. For the sake of simplicity, a log entry contains\nan access ID, an IP address and the login of the user.\nA dataset, corresponding to the format described above can be generated, thanks to the\nfollowing set of unix commands:\n```bash\nmkdir logs\nprintf \"{\\\"id\\\":1,\\\"ip\\\":\\\"127.0.0.1\\\",\\\"login\\\":\\\"alice\\\"}\n{\\\"id\\\":2,\\\"ip\\\":\\\"8.8.8.8\\\",\\\"login\\\":\\\"alice\\\"}\" > logs/part_1.jsonlines\nprintf \"{\\\"id\\\":3,\\\"ip\\\":\\\"8.8.8.8\\\",\\\"login\\\":\\\"bob\\\"}\n{\\\"id\\\":4,\\\"ip\\\":\\\"127.0.0.1\\\",\\\"login\\\":\\\"alice\\\"}\" > logs/part_2.jsonlines\n```\nNow, let\u2019s see how you can use the connector in order to read the content of this\ndirectory into a table:\n```python\nclass InputSchema(pw.Schema):\n ip: str\n login: str\nt = pw.io.jsonlines.read(\"logs/\", schema=InputSchema, mode=\"static\")\n```\nThe only difference is that you specified the name of the directory instead of the\nfile name, as opposed to what you had done in the previous example. It\u2019s that simple!\nBut what if you are working with a real-time system, which generates logs all the time.\nThe logs are being written and after a while they get into the log directory (this is\nalso called \u201clogs rotation\u201d). Now, consider that there is a need to fetch the new files\nfrom this logs directory all the time. Would Pathway handle that? Sure!\nThe only difference would be in the usage of `mode` flag. So the code\nsnippet will look as follows:\n```python\nclass InputSchema(pw.Schema):\n ip: str\n login: str\nt = pw.io.jsonlines.read(\"logs/\", schema=InputSchema, mode=\"streaming\")\n```\nWith this method, you obtain a table updated dynamically. The changes in the logs would incur\nchanges in the Business-Intelligence \u2018BI\u2019-ready data, namely, in the tables you would like to output. To see\nhow these changes are reported by Pathway, have a look at the\n\u201cStreams of Updates and Snapshots\u201d\narticle.\npw.io.jsonlines.write(table, filename)\nWrites `table`\u2019s stream of updates to a file in jsonlines format.\n* Parameters\n * table (`Table`) \u2013 Table to be written.\n * filename (`str` | `PathLike`) \u2013 Path to the target output file.\n* Returns\n None\nExample:\nIn this simple example you can see how table output works.\nFirst, import Pathway and create a table:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nConsider you would want to output the stream of changes of this table. In order to do that\nyou simply do:\n```python\npw.io.jsonlines.write(t, \"table.jsonlines\")\n```\nNow, let\u2019s see what you have on the output:\n```bash\ncat table.jsonlines\n```\n```json\n{\"age\":10,\"owner\":\"Alice\",\"pet\":\"dog\",\"diff\":1,\"time\":0}\n{\"age\":9,\"owner\":\"Bob\",\"pet\":\"cat\",\"diff\":1,\"time\":0}\n{\"age\":8,\"owner\":\"Alice\",\"pet\":\"cat\",\"diff\":1,\"time\":0}\n```\nThe columns age, owner and pet clearly represent the data columns you have. The\ncolumn time represents the number of operations minibatch, in which each of the\nrows was read. In this example, since the data is static: you have 0. The diff is\nanother element of this stream of updates. In this context, it is 1 because all\nthree rows were read from the input. All in all, the extra information in `time` and\n`diff` columns - in this case - shows us that in the initial minibatch (`time = 0`),\nyou have read three rows and all of them were added to the collection (`diff = 1`).\n"} -{"doc": "Notes\nThe CSV files should follow a standard CSV settings: the separator is \u2018,\u2019, the\nquotechar is \u2018\u201d\u2019, and there is no escape.\npw.demo.replay_csv_with_time(path, *, schema, time_column, unit='s', autocommit_ms=100, speedup=1)\nReplay a static CSV files as a data stream while respecting the time between updated based on a timestamp columns.\nThe timestamps in the file should be ordered positive integers.\n* Parameters\n * path (`str`) \u2013 Path to the file to stream.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * time_column (`str`) \u2013 Column containing the timestamps.\n * unit (`str`) \u2013 Unit of the timestamps. Only \u2018s\u2019, \u2018ms\u2019, \u2018us\u2019, and \u2018ns\u2019 are supported. Defaults to \u2018s\u2019.\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * speedup (`float`) \u2013 Produce stream speedup times faster than it would result from the time column.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "---\ntitle: pathway.demo package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.demo package\nPathway demo module\nTypical use:\n```python\nclass InputSchema(pw.Schema):\n name: str\n age: int\npw.demo.replay_csv(\"./input_stream.csv\", schema=InputSchema)\n```\n::\nResult\n```\n, 'age': }>\n```\n::\n::\nFunctions\npw.demo.generate_custom_stream(value_generators, *, schema, nb_rows=None, autocommit_duration_ms=1000, input_rate=1.0, persistent_id=None)\nGenerates a data stream.\nThe generator creates a table and periodically streams rows.\nIf a `nb_rows` value is provided, there are `nb_rows` row generated in total,\nelse the generator streams indefinitely.\nThe rows are generated iteratively and have an associated index x, starting from 0.\nThe values of each column are generated by their associated function in `value_generators`.\n* Parameters\n * value_generators (`dict`\\[`str`, `Any`\\]) \u2013 Dictionary mapping column names to functions that generate values for each column.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * nb_rows (`Optional`\\[`int`\\]) \u2013 The number of rows to generate. Defaults to None. If set to None, the generator\n generates streams indefinitely.\n * types \u2013 Dictionary containing the mapping between the columns and the data types (`pw.Type`) of the values of those columns. This parameter is optional, and if not provided the default type is `pw.Type.ANY`.\n * autocommit_duration_ms (`int`) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * input_rate (`float`) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *Table* \u2013 The generated table.\nExample:\n"} -{"doc": "---\ntitle: pathway.demo package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.demo package\nPathway demo module\nTypical use:\n```python\nvalue_functions = {\n 'number': lambda x: x + 1,\n 'name': lambda x: f'Person {x}',\n 'age': lambda x: 20 + x,\n}\nclass InputSchema(pw.Schema):\n number: int\n name: str\n age: int\npw.demo.generate_custom_stream(value_functions, schema=InputSchema, nb_rows=10)\n```\n::\nResult\n```\n, 'name': , 'age': }>\n```\n::\n::\nIn the above example, a data stream is generated with 10 rows, where each row has columns \u2018number\u2019, \u2018name\u2019, and \u2018age\u2019.\nThe \u2018number\u2019 column contains values incremented by 1 from 1 to 10, the \u2018name\u2019 column contains \u2018Person\u2019\nfollowed by the respective row index, and the \u2018age\u2019 column contains values starting from 20 incremented by\nthe row index.\npw.demo.noisy_linear_stream(nb_rows=10, input_rate=1.0)\nGenerates an artificial data stream for the linear regression tutorial.\n* Parameters\n * nb_rows (*int, optional*) \u2013 The number of rows to generate in the data stream. Defaults to 10.\n * input_rate (*float, optional*) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *pw.Table* \u2013 A table containing the generated data stream.\nExample:\n```python\ntable = pw.demo.noisy_linear_stream(nb_rows=100, input_rate=2.0)\n```\nIn the above example, an artificial data stream is generated with 100 rows. Each row has two columns, \u2018x\u2019 and \u2018y\u2019.\nThe \u2018x\u2019 values range from 0 to 99, and the \u2018y\u2019 values are equal to \u2018x\u2019 plus some random noise.\npw.demo.range_stream(nb_rows=30, offset=0, input_rate=1.0)\nGenerates a simple artificial data stream, used to compute the sum in our examples.\n* Parameters\n * nb_rows (*int, optional*) \u2013 The number of rows to generate in the data stream. Defaults to 30.\n * offset (*int, optional*) \u2013 The offset value added to the generated \u2018value\u2019 column. Defaults to 0.\n * input_rate (*float, optional*) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *pw.Table* \u2013 a table containing the generated data stream.\nExample:\n```python\ntable = pw.demo.range_stream(nb_rows=50, offset=10, input_rate=2.5)\n```\nIn the above example, an artificial data stream is generated with a single column \u2018value\u2019 and 50 rows.\nThe \u2018value\u2019 column contains values ranging from \u2018offset\u2019 (10 in this case) to \u2018nb_rows\u2019 + \u2018offset\u2019 (60).\npw.demo.replay_csv(path, *, schema, input_rate=1.0)\nReplay a static CSV files as a data stream.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file to stream.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * input_rate (*float, optional*) \u2013 The rate at which rows are read per second. Defaults to 1.0.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "pathway.stdlib.temporal.utils module\npw.temporal.utils.check_joint_types(parameters)\nChecks if all parameters have types that allow to execute a function.\nIf parameters are {\u2018a\u2019: (a, TimeEventType), \u2018b\u2019: (b, IntervalType)} then\nthe following pairs of types are allowed for (a, b): (int, int), (float, float),\n(datetime.datetime, datetime.timedelta)\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n how=pw.JoinMode.LEFT,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\nclass pw.temporal.AsofNowJoinResult(original_left, left, right, join_result, table_substitution, mode, id)\nResult of an asof now join between tables.\nselect(*args, kwargs)\nComputes a result of an asof now join.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`ColumnExpression`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nclass pw.temporal.CommonBehavior(delay, cutoff, keep_results)\nDefines temporal behavior of windows and temporal joins.\nclass pw.temporal.Direction(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.temporal.IntervalJoinResult(left_bucketed, right_bucketed, earlier_part_filtered, later_part_filtered, table_substitution, mode, _filter_out_results_of_forgetting)\nResult of an interval join between tables.\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\njoin_result = t1.interval_join_inner(t2, t1.t, t2.t, pw.temporal.interval(-2, 1))\nisinstance(join_result, pw.temporal.IntervalJoinResult)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\npw.debug.compute_and_print(\n join_result.select(left_t=t1.t, right_t=t2.t), include_id=False\n)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\nselect(*args, kwargs)\nComputes a result of an interval join.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`Any`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_inner(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\nclass pw.temporal.Window()\nclass pw.temporal.WindowJoinResult(join_result, left_original, right_original, left_new, right_new)\nResult of a window join between tables.\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\njoin_result = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2))\nisinstance(join_result, pw.temporal.WindowJoinResult)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\npw.debug.compute_and_print(\n join_result.select(left_t=t1.t, right_t=t2.t), include_id=False\n)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\nselect(*args, kwargs)\nComputes a result of a window join.\n:type args: `ColumnReference`\n:param args: Column references.\n:type kwargs: `Any`\n:param kwargs: Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n>>> import pathway as pw\n>>> t1 = pw.debug.table_from_markdown(\n\u2026 \u2018\u2019\u2019\n\u2026 | a | t\n\u2026 1 | 1 | 1\n\u2026 2 | 1 | 2\n\u2026 3 | 1 | 3\n\u2026 4 | 1 | 7\n\u2026 5 | 1 | 13\n\u2026 6 | 2 | 1\n\u2026 7 | 2 | 2\n\u2026 8 | 3 | 4\n\u2026 \u2018\u2019\u2019\n\u2026 )\n>>> t2 = pw.debug.table_from_markdown(\n\u2026 \u2018\u2019\u2019\n\u2026 | b | t\n\u2026 1 | 1 | 2\n\u2026 2 | 1 | 5\n\u2026 3 | 1 | 6\n\u2026 4 | 1 | 7\n\u2026 5 | 2 | 2\n\u2026 6 | 2 | 3\n\u2026 7 | 4 | 3\n\u2026 \u2018\u2019\u2019\n\u2026 )\n>>> t3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n\u2026 key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t\n\u2026 )\n>>> pw.debug.compute_and_print(t3, include_id=False)\nkey | left_t | right_t\n1 | | 5\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n4 | | 3\nFunctions\npw.temporal.asof_join(self, other, self_time, other_time, *on, how, defaults={}, direction=Direction.BACKWARD)\nPerform an ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * how (`JoinMode`) \u2013 mode of the join (LEFT, RIGHT, FULL)\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n how=pw.JoinMode.LEFT,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\npw.temporal.asof_join_left(self, other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform a left ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_left(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\npw.temporal.asof_join_outer(self, other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform an outer ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_outer(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t1.val: -1, t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 2 | 1 | 0 | 1\n0 | 3 | 1 | 6 | 7\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 2 | 7\n0 | 7 | 5 | 6 | 11\n0 | 8 | 5 | 3 | 8\n0 | 9 | 5 | 9 | 14\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n0 | 13 | 7 | 7 | 14\n0 | 14 | 7 | 4 | 11\n1 | 2 | -1 | 7 | 6\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n1 | 8 | 9 | 3 | 12\n```\n::\n::\npw.temporal.asof_join_right(self, other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform a right ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_right(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t1.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 2 | 1 | 0 | 1\n0 | 3 | 1 | 6 | 7\n0 | 7 | 5 | 2 | 7\n0 | 8 | 5 | 3 | 8\n0 | 9 | 5 | 9 | 14\n0 | 13 | 7 | 7 | 14\n0 | 14 | 7 | 4 | 11\n1 | 2 | -1 | 7 | 6\n1 | 8 | 9 | 3 | 12\n```\n::\n::\npw.temporal.asof_now_join(self, other, *on, how=JoinMode.INNER, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. Rows from self are not stored.\nThey are joined with rows of other at their processing time. If other is updated\nin the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT}\n which correspond to inner and left join respectively.\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\npw.temporal.asof_now_join_inner(self, other, *on, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. Rows from self are not stored.\nThey are joined with rows of other at their processing time. If other is updated\nin the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\npw.temporal.asof_now_join_left(self, other, *on, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. If there are no matching\nrows in other, missing values on the right side are replaced with None.\nRows from self are not stored. They are joined with rows of other at their processing\ntime. If other is updated in the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\npw.temporal.common_behavior(delay=None, cutoff=None, keep_results=True)\nCreates CommonBehavior\n* Parameters\n * delay (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Optional; for windows, delays initial output by `delay` with respect to the\n beginning of the window. Setting it to `None` does not enable\n delaying mechanism.\n For interval joins, it delays the time the record is joined by `delay`.\n Using delay is useful when updates are too frequent.\n * cutoff (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Optional; for windows, stops updating windows which end earlier than maximal\n seen time minus `cutoff`. Setting cutoff to `None` does not enable\n cutoff mechanism.\n For interval joins, it ignores entries that are older\n than maximal seen time minus `cutoff`. This parameter is also used to clear\n memory. It allows to release memory used by entries that won\u2019t change.\n * keep_results (`bool`) \u2013 If set to True, keeps all results of the operator. If set to False,\n keeps only results that are newer than maximal seen time minus `cutoff`.\n Can\u2019t be set to `False`, when `cutoff` is `None`.\npw.temporal.interval(lower_bound, upper_bound)\nAllows testing whether two times are within a certain distance.\nNOTE: Usually used as an argument of .interval_join().\n* Parameters\n * lower_bound (`int` | `float` | `timedelta`) \u2013 a lower bound on other_time - self_time.\n * upper_bound (`int` | `float` | `timedelta`) \u2013 an upper bound on other_time - self_time.\n* Returns\n *Window* \u2013 object to pass as an argument to .interval_join()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\npw.temporal.interval_join(self, other, self_time, other_time, interval, *on, behavior=None, how=JoinMode.INNER)\nPerforms an interval join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n * how (`JoinMode`) \u2013 decides whether to run interval_join_inner, interval_join_left, interval_join_right\n or interval_join_outer. Default is INNER.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b, how=pw.JoinMode.INNER\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\npw.temporal.interval_join_inner(self, other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_inner(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_inner(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\npw.temporal.interval_join_left(self, other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval left join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows from the left\nside that haven\u2019t been matched with the right side are returned with missing\nvalues on the right side replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_left(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n11 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_left(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n1 | 11 |\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n3 | 4 |\n```\n::\n::\npw.temporal.interval_join_outer(self, other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval outer join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows that haven\u2019t\nbeen matched with the other side are returned with missing values on the other\nside replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_outer(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 0\n | 7\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n11 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_outer(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n | | 0\n | | 2\n | | 7\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n1 | 11 |\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n3 | 4 |\n```\n::\n::\npw.temporal.interval_join_right(self, other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval right join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows from the right\nside that haven\u2019t been matched with the left side are returned with missing\nvalues on the left side replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_right(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 0\n | 7\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_right(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n | | 0\n | | 2\n | | 7\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\npw.temporal.intervals_over(*, at, lower_bound, upper_bound, is_outer=True)\nAllows grouping together elements within a window.\nWindows are created for each time t in at, by taking values with times\nwithin \\[t+lower_bound, t+upper_bound\\].\nNote: If a tuple reducer will be used on grouped elements within a window, values\nin the tuple will be sorted according to their time column.\n* Parameters\n * lower_bound (`int` | `float` | `timedelta`) \u2013 lower bound for interval\n * upper_bound (`int` | `float` | `timedelta`) \u2013 upper bound for interval\n * at (`ColumnReference`) \u2013 column of times for which windows are to be created\n * is_outer (`bool`) \u2013 decides whether empty windows should return None or be omitted\n* Returns\n *Window* \u2013 object to pass as an argument to .windowby()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | t | v\n1 | 1 | 10\n2 | 2 | 1\n3 | 4 | 3\n4 | 8 | 2\n5 | 9 | 4\n6 | 10| 8\n7 | 1 | 9\n8 | 2 | 16\n''')\nprobes = pw.debug.table_from_markdown(\n'''\nt\n2\n4\n6\n8\n10\n''')\nresult = (\n pw.temporal.windowby(t, t.t, window=pw.temporal.intervals_over(\n at=probes.t, lower_bound=-2, upper_bound=1\n ))\n .reduce(pw.this._pw_window_location, v=pw.reducers.tuple(pw.this.v))\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\n_pw_window_location | v\n2 | (9, 10, 16, 1)\n4 | (16, 1, 3)\n6 | (3,)\n8 | (2, 4)\n10 | (2, 4, 8)\n```\n::\n::\npw.temporal.session(*, predicate=None, max_gap=None)\nAllows grouping together elements within a window across ordered time-like\ndata column by locally grouping adjacent elements either based on a maximum time\ndifference or using a custom predicate.\nNOTE: Usually used as an argument of .windowby().\nExactly one of the arguments predicate or max_gap should be provided.\n* Parameters\n * predicate (`Optional`\\[`Callable`\\[\\[`Any`, `Any`\\], `bool`\\]\\]) \u2013 function taking two adjacent entries that returns a boolean saying\n whether the two entries should be grouped\n * max_gap (`UnionType`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Two adjacent entries will be grouped if b - a < max_gap\n* Returns\n *Window* \u2013 object to pass as an argument to .windowby()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t | v\n1 | 0 | 1 | 10\n2 | 0 | 2 | 1\n3 | 0 | 4 | 3\n4 | 0 | 8 | 2\n5 | 0 | 9 | 4\n6 | 0 | 10| 8\n7 | 1 | 1 | 9\n8 | 1 | 2 | 16\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.session(predicate=lambda a, b: abs(a-b) <= 1), shard=t.shard\n).reduce(\npw.this._pw_shard,\npw.this._pw_window_start,\npw.this._pw_window_end,\nmin_t=pw.reducers.min(pw.this.t),\nmax_v=pw.reducers.max(pw.this.v),\ncount=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\n_pw_shard | _pw_window_start | _pw_window_end | min_t | max_v | count\n0 | 1 | 2 | 1 | 10 | 2\n0 | 4 | 4 | 4 | 3 | 1\n0 | 8 | 10 | 8 | 8 | 3\n1 | 1 | 2 | 1 | 16 | 2\n```\n::\n::\npw.temporal.sliding(hop, duration=None, ratio=None, offset=None)\nAllows grouping together elements within a window of a given length sliding\nacross ordered time-like data column according to a specified interval (hop)\nstarting from a given offset.\nNOTE: Usually used as an argument of .windowby().\nExactly one of the arguments hop or ratio should be provided.\n* Parameters\n * hop (`int` | `float` | `timedelta`) \u2013 frequency of a window\n * duration (`UnionType`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 length of the window\n * ratio (`Optional`\\[`int`\\]) \u2013 used as an alternative way to specify duration as hop \\* ratio\n * offset (`UnionType`\\[`int`, `float`, `datetime`, `None`\\]) \u2013 beginning of the first window\n* Returns\n *Window* \u2013 object to pass as an argument to .windowby()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t\n 1 | 0 | 12\n 2 | 0 | 13\n 3 | 0 | 14\n 4 | 0 | 15\n 5 | 0 | 16\n 6 | 0 | 17\n 7 | 1 | 10\n 8 | 1 | 11\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.sliding(duration=10, hop=3), shard=t.shard\n).reduce(\n pw.this._pw_shard,\n pw.this._pw_window_start,\n pw.this._pw_window_end,\n min_t=pw.reducers.min(pw.this.t),\n max_t=pw.reducers.max(pw.this.t),\n count=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\n_pw_shard | _pw_window_start | _pw_window_end | min_t | max_t | count\n0 | 3 | 13 | 12 | 12 | 1\n0 | 6 | 16 | 12 | 15 | 4\n0 | 9 | 19 | 12 | 17 | 6\n0 | 12 | 22 | 12 | 17 | 6\n0 | 15 | 25 | 15 | 17 | 3\n1 | 3 | 13 | 10 | 11 | 2\n1 | 6 | 16 | 10 | 11 | 2\n1 | 9 | 19 | 10 | 11 | 2\n```\n::\n::\npw.temporal.tumbling(duration, offset=None)\nAllows grouping together elements within a window of a given length tumbling\nacross ordered time-like data column starting from a given offset.\nNOTE: Usually used as an argument of .windowby().\n* Parameters\n * duration (`int` | `float` | `timedelta`) \u2013 length of the window\n * offset (`UnionType`\\[`int`, `float`, `datetime`, `None`\\]) \u2013 beginning of the first window\n* Returns\n *Window* \u2013 object to pass as an argument to .windowby()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t\n 1 | 0 | 12\n 2 | 0 | 13\n 3 | 0 | 14\n 4 | 0 | 15\n 5 | 0 | 16\n 6 | 0 | 17\n 7 | 1 | 12\n 8 | 1 | 13\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.tumbling(duration=5), shard=t.shard\n).reduce(\n pw.this._pw_shard,\n pw.this._pw_window_start,\n pw.this._pw_window_end,\n min_t=pw.reducers.min(pw.this.t),\n max_t=pw.reducers.max(pw.this.t),\n count=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\n_pw_shard | _pw_window_start | _pw_window_end | min_t | max_t | count\n0 | 10 | 15 | 12 | 14 | 3\n0 | 15 | 20 | 15 | 17 | 3\n1 | 10 | 15 | 12 | 13 | 2\n```\n::\n::\npw.temporal.window_join(self, other, self_time, other_time, window, *on, how=JoinMode.INNER)\nPerforms a window join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * how (`JoinMode`) \u2013 decides whether to run window_join_inner, window_join_left, window_join_right\n or window_join_outer. Default is INNER.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n```\n::\n::\npw.temporal.window_join_inner(self, other, self_time, other_time, window, *on)\nPerforms a window join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_inner(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_inner(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n```\n::\n::\npw.temporal.window_join_left(self, other, self_time, other_time, window, *on)\nPerforms a window left join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from the left side that didn\u2019t match with any record on the right side in\na given window, are returned with missing values on the right side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 |\n1 | 2\n2 | 2\n2 | 2\n3 |\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n13 |\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_left(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n10 |\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_left(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n2 | 7 |\n3 | 4 |\n```\n::\n::\npw.temporal.window_join_outer(self, other, self_time, other_time, window, *on)\nPerforms a window outer join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from both sides that didn\u2019t match with any record on the other side in\na given window, are returned with missing values on the other side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n | 5\n | 6\n1 |\n1 | 2\n2 | 2\n2 | 2\n3 |\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n13 |\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | | 5\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n4 | | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_outer(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | -3\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n10 |\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_outer(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | | 10\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n2 | 7 |\n3 | 4 |\n4 | | 3\n```\n::\n::\npw.temporal.window_join_right(self, other, self_time, other_time, window, *on)\nPerforms a window right join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from the right side that didn\u2019t match with any record on the left side in\na given window, are returned with missing values on the left side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n | 5\n | 6\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t2.b, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | | 5\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n4 | | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_right(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | -3\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_right(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t2.b, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | | 10\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n4 | | 3\n```\n::\n::\npw.temporal.windowby(self, time_expr, *, window, behavior=None, shard=None)\nCreate a GroupedTable by windowing the table (based on expr and window),\noptionally sharded with shard\n* Parameters\n * time_expr (`ColumnExpression`) \u2013 Column expression used for windowing\n * window (`Window`) \u2013 type window to use\n * shard (`Optional`\\[`ColumnExpression`\\]) \u2013 optional column expression to act as a shard key\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t | v\n1 | 0 | 1 | 10\n2 | 0 | 2 | 1\n3 | 0 | 4 | 3\n4 | 0 | 8 | 2\n5 | 0 | 9 | 4\n6 | 0 | 10| 8\n7 | 1 | 1 | 9\n8 | 1 | 2 | 16\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.session(predicate=lambda a, b: abs(a-b) <= 1), shard=t.shard\n).reduce(\npw.this.shard,\nmin_t=pw.reducers.min(pw.this.t),\nmax_v=pw.reducers.max(pw.this.v),\ncount=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nshard | min_t | max_v | count\n0 | 1 | 10 | 2\n0 | 4 | 3 | 1\n0 | 8 | 8 | 3\n1 | 1 | 16 | 2\n```\n::\n::"} -{"doc": "pathway.stdlib.temporal.temporal_behavior module\nclass pw.temporal.temporal_behavior.Behavior()\nA superclass of all classes defining temporal behavior.\nclass pw.temporal.temporal_behavior.CommonBehavior(delay, cutoff, keep_results)\nDefines temporal behavior of windows and temporal joins.\nclass pw.temporal.temporal_behavior.ExactlyOnceBehavior(shift)\npw.temporal.temporal_behavior.common_behavior(delay=None, cutoff=None, keep_results=True)\nCreates CommonBehavior\n* Parameters\n * delay (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Optional; for windows, delays initial output by `delay` with respect to the\n beginning of the window. Setting it to `None` does not enable\n delaying mechanism.\n For interval joins, it delays the time the record is joined by `delay`.\n Using delay is useful when updates are too frequent.\n * cutoff (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Optional; for windows, stops updating windows which end earlier than maximal\n seen time minus `cutoff`. Setting cutoff to `None` does not enable\n cutoff mechanism.\n For interval joins, it ignores entries that are older\n than maximal seen time minus `cutoff`. This parameter is also used to clear\n memory. It allows to release memory used by entries that won\u2019t change.\n * keep_results (`bool`) \u2013 If set to True, keeps all results of the operator. If set to False,\n keeps only results that are newer than maximal seen time minus `cutoff`.\n Can\u2019t be set to `False`, when `cutoff` is `None`.\npw.temporal.temporal_behavior.exactly_once_behavior(shift=None)\nCreates an instance of class ExactlyOnceBehavior, indicating that each non empty\nwindow should produce exactly one output.\n* Parameters\n * shift (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 optional, defines the moment in time (`window end + shift`) in which\n * output. (*the window stops accepting the data and sends the results to the*) \u2013 \n * shift=0. (*Setting it to None is interpreted as*) \u2013 \nRemark:\n note that setting a non-zero shift and demanding exactly one output results in\n the output being delivered only when the time in the time column reaches\n `window end + shift`.\n"} -{"doc": "---\ntitle: pathway.stdlib.ordered package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ordered package\nFunctions\npw.ordered.diff(self, timestamp, *values)\nCompute the difference between the values in the `values` columns and the previous values\naccording to the order defined by the column `timestamp`.\n* Parameters\n * timestamp (*-*) \u2013 The column reference to the `timestamp` column on\n which the order is computed.\n * \\*values (*-*) \u2013 Variable-length argument representing the column\n references to the `values` columns.\n* Returns\n `Table` \u2013 A new table where each column is replaced with a new column containing\n the difference and whose name is the concatenation of diff_ and the former name.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference.\nNOTE: * The value of the \u201cfirst\u201d value (the row with the lower value\n in the `timestamp` column) is `None`.\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values\n1 | 1\n2 | 2\n3 | 4\n4 | 7\n5 | 11\n6 | 16\n''')\ntable += table.diff(pw.this.timestamp, pw.this.values)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values | diff_values\n1 | 1 |\n2 | 2 | 1\n3 | 4 | 2\n4 | 7 | 3\n5 | 11 | 4\n6 | 16 | 5\n```\n::\n::"} -{"doc": "pathway.stdlib.ordered.diff module\npw.ordered.diff.diff(self, timestamp, *values)\nCompute the difference between the values in the `values` columns and the previous values\naccording to the order defined by the column `timestamp`.\n* Parameters\n * timestamp (*-*) \u2013 The column reference to the `timestamp` column on\n which the order is computed.\n * \\*values (*-*) \u2013 Variable-length argument representing the column\n references to the `values` columns.\n* Returns\n `Table` \u2013 A new table where each column is replaced with a new column containing\n the difference and whose name is the concatenation of diff_ and the former name.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference.\nNOTE: * The value of the \u201cfirst\u201d value (the row with the lower value\n in the `timestamp` column) is `None`.\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values\n1 | 1\n2 | 2\n3 | 4\n4 | 7\n5 | 11\n6 | 16\n''')\ntable += table.diff(pw.this.timestamp, pw.this.values)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values | diff_values\n1 | 1 |\n2 | 2 | 1\n3 | 4 | 2\n4 | 7 | 3\n5 | 11 | 4\n6 | 16 | 5\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.io.s3 package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.s3 package\nclass pw.io.s3.AwsS3Settings(*, bucket_name=None, access_key=None, secret_access_key=None, with_path_style=False, region=None, endpoint=None)\nStores Amazon S3 connection settings. You may also use this class to store\nconfiguration settings for any custom S3 installation, however you will need to\nspecify the region and the endpoint.\n* Parameters\n * bucket_name \u2013 Name of S3 bucket.\n * access_key \u2013 Access key for the bucket.\n * secret_access_key \u2013 Secret access key for the bucket.\n * with_path_style \u2013 Whether to use path-style requests.\n * region \u2013 Region of the bucket.\n * endpoint \u2013 Custom endpoint in case of self-hosted storage.\nclassmethod new_from_path(s3_path)\nConstructs settings from S3 path. The engine will look for the credentials in\nenvironment variables and in local AWS profiles. It will also automatically\ndetect the region of the bucket.\nThis method may fail if there are no credentials or they are incorrect. It may\nalso fail if the bucket does not exist.\n* Parameters\n s3_path (`str`) \u2013 full path to the object in the form `s3:///`.\n* Returns\n Configuration object.\nclass pw.io.s3.DigitalOceanS3Settings(bucket_name, *, access_key=None, secret_access_key=None, region=None)\nStores Digital Ocean S3 connection settings.\n* Parameters\n * bucket_name \u2013 Name of Digital Ocean S3 bucket.\n * access_key \u2013 Access key for the bucket.\n * secret_access_key \u2013 Secret access key for the bucket.\n * region \u2013 Region of the bucket.\nclass pw.io.s3.WasabiS3Settings(bucket_name, *, access_key=None, secret_access_key=None, region=None)\nStores Wasabi S3 connection settings.\n* Parameters\n * bucket_name \u2013 Name of Wasabi S3 bucket.\n * access_key \u2013 Access key for the bucket.\n * secret_access_key \u2013 Secret access key for the bucket.\n * region \u2013 Region of the bucket.\nFunctions\npw.io.s3.read(path, format, *, aws_s3_settings=None, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from one or several objects in Amazon S3 bucket in the given\nformat.\nIn case the prefix of S3 path is specified, and there are several objects lying\nunder this prefix, their order is determined according to their modification times:\nthe smaller the modification time is, the earlier the file will be passed to the\nengine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in Amazon S3 bucket.\n * aws_s3_settings (`Optional`\\[`AwsS3Settings`\\]) \u2013 Connection parameters for the S3 account and the bucket.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d and \u201cplaintext\u201d\n formats are supported.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new objects under the\n given path prefix. Set it to \u201cstatic\u201d, it will only consider the available\n data and ingest all of it. Default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nLet\u2019s consider an object store, which is hosted in Amazon S3. The store contains\ndatasets in the respective bucket and is located in the region eu-west-3. The goal\nis to read the dataset, located under the path `animals/` in this bucket.\nLet\u2019s suppose that the format of the dataset rows is jsonlines.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.s3.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3.AwsS3Settings(\n bucket_name=\"datasets\",\n region=\"eu-west-3\",\n access_key=os.environ[\"S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"S3_SECRET_ACCESS_KEY\"],\n ),\n format=\"json\",\n schema=InputSchema,\n)\n```\nIn case you are dealing with a public bucket, the parameters `access_key` and\n`secret_access_key` can be omitted. In this case, the read part will look as\nfollows:\n```python\nt = pw.io.s3.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3.AwsS3Settings(\n bucket_name=\"datasets\",\n region=\"eu-west-3\",\n ),\n format=\"json\",\n schema=InputSchema,\n)\n```\npw.io.s3.read_from_digital_ocean(path, do_s3_settings, format, *, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from one or several objects in Digital Ocean S3 bucket.\nIn case the prefix of S3 path is specified, and there are several objects lying\nunder this prefix, their order is determined according to their modification times:\nthe smaller the modification time is, the earlier the file will be passed to the\nengine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in S3 bucket.\n * do_s3_settings (`DigitalOceanS3Settings`) \u2013 Connection parameters for the account and the bucket.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d and \u201cplaintext\u201d\n formats are supported.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new objects under the\n given path prefix. Set it to \u201cstatic\u201d, it will only consider the available\n data and ingest all of it. Default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nLet\u2019s consider an object store, which is hosted in Digital Ocean S3. The store\ncontains CSV datasets in the respective bucket and is located in the region ams3.\nThe goal is to read the dataset, located under the path `animals/` in this bucket.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.s3.read_from_digital_ocean(\n \"animals/\",\n do_s3_settings=pw.io.s3.DigitalOceanS3Settings(\n bucket_name=\"datasets\",\n region=\"ams3\",\n access_key=os.environ[\"DO_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"DO_S3_SECRET_ACCESS_KEY\"],\n ),\n format=\"csv\",\n schema=InputSchema,\n)\n```\npw.io.s3.read_from_wasabi(path, wasabi_s3_settings, format, *, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from one or several objects in Wasabi S3 bucket.\nIn case the prefix of S3 path is specified, and there are several objects lying under\nthis prefix, their order is determined according to their modification times: the\nsmaller the modification time is, the earlier the file will be passed to the engine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in S3 bucket.\n * wasabi_s3_settings (`WasabiS3Settings`) \u2013 Connection parameters for the account and the bucket.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d and \u201cplaintext\u201d\n formats are supported.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new objects under the\n given path prefix. Set it to \u201cstatic\u201d, it will only consider the available\n data and ingest all of it. Default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nLet\u2019s consider an object store, which is hosted in Wasabi S3. The store\ncontains CSV datasets in the respective bucket and is located in the region us-west-1.\nThe goal is to read the dataset, located under the path `animals/` in this bucket.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.s3.read_from_wasabi(\n \"animals/\",\n wasabi_s3_settings=pw.io.s3.WasabiS3Settings(\n bucket_name=\"datasets\",\n region=\"us-west-1\",\n access_key=os.environ[\"WASABI_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"WASABI_S3_SECRET_ACCESS_KEY\"],\n ),\n format=\"csv\",\n schema=InputSchema,\n)\n```\n"} -{"doc": "---\ntitle: pathway.persistence package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.persistence package\nclass pw.persistence.Backend(engine_data_storage, fs_path=None)\nThe settings of a backend, which is used to persist the computation state. There\nare two kinds of data backends: metadata backend and snapshot backend. Both are\nconfigurable via this class.\nclassmethod filesystem(path)\nConfigure the filesystem backend.\n* Parameters\n path (`str` | `PathLike`\\[`str`\\]) \u2013 the path to the root directory in the file system, which will be used to store the persisted data.\n* Returns\n Class instance denoting the filesystem storage backend with root directory at `path`.\nclassmethod s3(root_path, bucket_settings)\nConfigure the S3 backend.\n* Parameters\n * root_path (`str`) \u2013 path to the root in the S3 storage, which will be used to store persisted data;\n * bucket_settings (`AwsS3Settings`) \u2013 the settings for S3 bucket connection in the same format as they are used by S3 connectors.\n* Returns\n Class instance denoting the S3 storage backend with root directory as\n `root_path` and connection settings given by `bucket_settings`.\nclass pw.persistence.Config(*, snapshot_interval_ms=0, metadata_storage, snapshot_storage, snapshot_access, replay_mode, continue_after_replay)\nConfigure the data persistence. An instance of this class should be passed as a\nparameter to pw.run in case persistence is enabled.\nPlease note that if you\u2019d like to use the same backend for both metadata and\nsnapshot storages, you can use the convenience method `simple_config`.\n* Parameters\n * metadata_storage (`Backend`) \u2013 metadata backend configuration;\n * snapshot_storage (`Backend`) \u2013 snapshots backend configuration;\n * snapshot_interval_ms (`int`) \u2013 the desired duration between snapshot updates in milliseconds;\nclassmethod simple_config(backend, snapshot_interval_ms=0, snapshot_access=, replay_mode=, continue_after_replay=True)\nConstruct config from a single instance of the `Backend` class, using this backend to persist metadata and snapshot.\n* Parameters\n * backend (`Backend`) \u2013 storage backend settings;\n * snapshot_interval_ms \u2013 the desired freshness of the persisted snapshot in milliseconds. The greater the value is, the more the amount of time that the snapshot may fall behind, and the less computational resources are required.\n* Returns\n Persistence config.\n"} -{"doc": "---\ntitle: pathway.stdlib.graphs.bellman_ford package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.graphs.bellman_ford package\nclass pw.graphs.bellman_ford.DistFromSource()\nclass pw.graphs.bellman_ford.Vertex()\n"} -{"doc": "pathway.stdlib.graphs.bellman_ford.impl module\nclass pw.graphs.bellman_ford.impl.Dist()\nclass pw.graphs.bellman_ford.impl.DistFromSource()\nclass pw.graphs.bellman_ford.impl.Vertex()\n"} -{"doc": "---\ntitle: pathway.io.kafka package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.kafka package\nFunctions\npw.io.kafka.read(rdkafka_settings, topic=None, *, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None, kwargs)\nGeneralized method to read the data from the given topic in Kafka.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of librdkafka.\n * topic (`UnionType`\\[`str`, `list`\\[`str`\\], `None`\\]) \u2013 Name of topic in Kafka from which the data should be read.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the field which require such mapping, it should be\n given in the format `: `, where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will\n be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table, required for format other than\n \u201craw\u201d. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n Otherwise, the primary key will be generated randomly.\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n Otherwise, the primary key will be generated randomly.\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named `data`.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider there is a queue in Kafka, running locally on port 9092. Our queue can\nuse SASL-SSL authentication over a SCRAM-SHA-256 mechanism. You can set up a queue\nwith similar parameters in Upstash. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nTo connect to the topic \u201canimals\u201d and accept messages, the connector must be used as follows, depending on the format:\nRaw version:\n```python\nimport pathway as pw\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"raw\",\n)\n```\nAll the data will be accessible in the column data.\nCSV version:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"csv\",\n schema=InputSchema\n)\n```\nIn case of CSV format, the first message must be the header:\n```csv\nowner,pet\n```\nThen, simple data rows are expected. For example:\n```csv\nAlice,cat\nBob,dog\n```\nThis way, you get a table which looks as follows:\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice cat\n Bob dog\n```\n::\n::\nJSON version:\n```python\nimport pathway as pw\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"json\",\n schema=InputSchema,\n)\n```\nFor the JSON connector, you can send these two messages:\n```json\n{\"owner\": \"Alice\", \"pet\": \"cat\"}\n{\"owner\": \"Bob\", \"pet\": \"dog\"}\n```\nThis way, you get a table which looks as follows:\n"} -{"doc": "---\ntitle: pathway.io.kafka package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.kafka package\nFunctions\npw.io.kafka.read(rdkafka_settings, topic=None, *, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None, kwargs)\nGeneralized method to read the data from the given topic in Kafka.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of librdkafka.\n * topic (`UnionType`\\[`str`, `list`\\[`str`\\], `None`\\]) \u2013 Name of topic in Kafka from which the data should be read.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the field which require such mapping, it should be\n given in the format `: `, where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will\n be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table, required for format other than\n \u201craw\u201d. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n Otherwise, the primary key will be generated randomly.\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n Otherwise, the primary key will be generated randomly.\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named `data`.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider there is a queue in Kafka, running locally on port 9092. Our queue can\nuse SASL-SSL authentication over a SCRAM-SHA-256 mechanism. You can set up a queue\nwith similar parameters in Upstash. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nTo connect to the topic \u201canimals\u201d and accept messages, the connector must be used as follows, depending on the format:\nRaw version:\n```python\nimport pathway as pw\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"raw\",\n)\n```\nAll the data will be accessible in the column data.\nCSV version:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"csv\",\n schema=InputSchema\n)\n```\nIn case of CSV format, the first message must be the header:\n```csv\nowner,pet\n```\nThen, simple data rows are expected. For example:\n```csv\nAlice,cat\nBob,dog\n```\nThis way, you get a table which looks as follows:\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice cat\n Bob dog\n```\n::\n::\nNow consider that the data about pets come in a more sophisticated way. For instance\nyou have an owner, kind and name of an animal, along with some physical measurements.\nThe JSON payload in this case may look as follows:\n```json\n{\n \"name\": \"Jack\",\n \"pet\": {\n \"animal\": \"cat\",\n \"name\": \"Bob\",\n \"measurements\": [100, 200, 300]\n }\n}\n```\nSuppose you need to extract a name of the pet and the height, which is the 2nd\n(1-based) or the 1st (0-based) element in the array of measurements. Then, you\nuse JSON Pointer and do a connector, which gets the data as follows:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n pet_name: str\n pet_height: int\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"json\",\n schema=InputSchema,\n json_field_paths={\n \"pet_name\": \"/pet/name\",\n \"pet_height\": \"/pet/measurements/1\"\n },\n)\n```\npw.io.kafka.read_from_upstash(endpoint, username, password, topic, *, read_only_new=False, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None)\nSimplified method to read data from Kafka instance hosted in Upstash. It requires\nendpoint address and topic along with credentials.\nRead starts from the beginning of the topic, unless the read_only_new parameter is\nset to True.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * endpoint (`str`) \u2013 Upstash endpoint for the sought queue, which can be found on \u201cDetails\u201d page.\n * username (`str`) \u2013 Username generated for this queue.\n * password (`str`) \u2013 Password generated for this queue. These credentials are also available on \u201cDetails\u201d page.\n * topic (`str`) \u2013 Name of topic in Kafka from which the data should be read.\n * read_only_new (`bool`) \u2013 If set to True only the entries which appear after the start of the program will be read. Otherwise, the read will be done from the beginning of thetopic.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 The maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the fields which require such mapping, it should be\n given in the format `: `, where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will\n be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named `data`.\nExample:\nConsider that there is a queue running in Upstash. Let\u2019s say the endpoint name is\n\u201chttps://example-endpoint.com:19092\u201d, topic is \u201ctest-topic\u201d and the credentials are\nstored in environment variables.\nSuppose that we need just to read the raw messages for the further processing. Then\nit can be done in the following way:\n```python\nimport os\nimport pathway as pw\nt = pw.io.kafka.read_from_upstash(\n endpoint=\"https://example-endpoint.com:19092\",\n topic=\"test-topic\",\n username=os.environ[\"KAFKA_USERNAME\"],\n password=os.environ[\"KAFKA_PASSWORD\"],\n)\n```\npw.io.kafka.simple_read(server, topic, *, read_only_new=False, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None)\nSimplified method to read data from Kafka. Only requires the server address and\nthe topic name. If you have any kind of authentication or require fine-tuning of the\nparameters, please use read method.\nRead starts from the beginning of the topic, unless the read_only_new parameter is\nset to True.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * server (`str`) \u2013 Address of the server.\n * topic (`str`) \u2013 Name of topic in Kafka from which the data should be read.\n * read_only_new (`bool`) \u2013 If set to True only the entries which appear after the start of the program will be read. Otherwise, the read will be done from the beginning of thetopic.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 The maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the fields which require such mapping, it should be\n given in the format `: `, where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will\n be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named `data`.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider that there\u2019s a Kafka queue running locally on the port 9092 and we need\nto read raw messages from the topic \u201ctest-topic\u201d. Then, it can be done in the\nfollowing way:\n```python\nimport pathway as pw\nt = pw.io.kafka.simple_read(\"localhost:9092\", \"test-topic\")\n```\npw.io.kafka.write(table, rdkafka_settings, topic_name, *, format='json', delimiter=',', kwargs)\nWrite a table to a given topic on a Kafka instance.\n* Parameters\n * table (`Table`) \u2013 the table to output.\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of\n librdkafka.\n * topic_name (`str`) \u2013 name of topic in Kafka to which the data should be sent.\n * format (`str`) \u2013 format of the input data, currently \u201cjson\u201d and \u201cdsv\u201d are supported.\n * delimiter (`str`) \u2013 field delimiter to be used in case of delimiter-separated values\n format.\n* Returns\n None\nLimitations:\nFor future proofing, the format is configurable, but (for now) only JSON is available.\nExample:\nConsider there is a queue in Kafka, running locally on port 9092. Our queue can\nuse SASL-SSL authentication over a SCRAM-SHA-256 mechanism. You can set up a queue\nwith similar parameters in Upstash. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nYou want to send a Pathway table t to the Kafka instance.\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nTo connect to the topic \u201canimals\u201d and send messages, the connector must be used as follows, depending on the format:\nJSON version:\n```python\npw.io.kafka.write(\n t,\n rdkafka_settings,\n \"animals\",\n format=\"json\",\n)\n```\nAll the updates of table t will be sent to the Kafka instance.\n"} -{"doc": "---\ntitle: pathway.io.python package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.python package\nclass pw.io.python.ConnectorSubject()\nAn abstract class allowing to create custom python connectors.\nCustom python connector can be created by extending this class and implementing\n`run()` function responsible for filling the buffer with data.\nThis function will be started by pathway engine in a separate thread.\nIn order to send a message one of the methods\n`next_json()`, `next_str()`, `next_bytes()` can be used.\nclose()\nSends a sentinel message.\nShould be called to indicate that no new messages will be sent.\ncommit()\nSends a commit message.\nnext_bytes(message)\nSends a message.\n* Parameters\n message (`bytes`) \u2013 bytes encoded json string.\nnext_json(message)\nSends a message.\n* Parameters\n message (`dict`) \u2013 Dict representing json.\nnext_str(message)\nSends a message.\n* Parameters\n message (`str`) \u2013 json string.\non_stop()\nCalled after the end of the `run()` function.\nstart()\nRuns a separate thread with function feeding data into buffer.\nShould not be called directly.\nFunctions\npw.io.python.read(subject, *, schema=None, format='json', autocommit_duration_ms=1500, debug_data=None, value_columns=None, primary_key=None, types=None, default_values=None, persistent_id=None)\nReads a table from a ConnectorSubject.\n* Parameters\n * subject (`ConnectorSubject`) \u2013 An instance of a `ConnectorSubject`.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format (`str`) \u2013 Format of the data produced by a subject, \u201cjson\u201d, \u201craw\u201d or \u201cbinary\u201d. In case of\n a \u201craw\u201d format, table with single \u201cdata\u201d column will be produced.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`int`) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will be persisted or `None`, if there is no need to persist the state of this table. When a program restarts, it restores the state for all input tables according to what was saved for their `persistent_id`. This way it\u2019s possible to configure the start of computations from the moment they were terminated last time.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nisinstance(t1, pw.Table)\n```\n::\nResult\n```\nTrue\n```\n::\n::\nproperty C(: ColumnNamespace )\nReturns the namespace of all the columns of a joinable.\nAllows accessing column names that might otherwise be a reserved methods.\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\ntab = pw.debug.table_from_markdown('''\nage | owner | pet | filter\n10 | Alice | dog | True\n9 | Bob | dog | True\n8 | Alice | cat | False\n7 | Bob | dog | True\n''')\nisinstance(tab.C.age, pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\npw.debug.compute_and_print(tab.filter(tab.C.filter), include_id=False)\n```\n::\nResult\n```\nage | owner | pet | filter\n7 | Bob | dog | True\n9 | Bob | dog | True\n10 | Alice | dog | True\n```\n::\n::\nasof_join(other, self_time, other_time, *on, how, defaults={}, direction=Direction.BACKWARD)\nPerform an ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * how (`JoinMode`) \u2013 mode of the join (LEFT, RIGHT, FULL)\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n how=pw.JoinMode.LEFT,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\nasof_join_left(other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform a left ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_left(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\nasof_join_outer(other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform an outer ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_outer(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t1.val: -1, t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 2 | 1 | 0 | 1\n0 | 3 | 1 | 6 | 7\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 2 | 7\n0 | 7 | 5 | 6 | 11\n0 | 8 | 5 | 3 | 8\n0 | 9 | 5 | 9 | 14\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n0 | 13 | 7 | 7 | 14\n0 | 14 | 7 | 4 | 11\n1 | 2 | -1 | 7 | 6\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n1 | 8 | 9 | 3 | 12\n```\n::\n::\nasof_join_right(other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform a right ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_right(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t1.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 2 | 1 | 0 | 1\n0 | 3 | 1 | 6 | 7\n0 | 7 | 5 | 2 | 7\n0 | 8 | 5 | 3 | 8\n0 | 9 | 5 | 9 | 14\n0 | 13 | 7 | 7 | 14\n0 | 14 | 7 | 4 | 11\n1 | 2 | -1 | 7 | 6\n1 | 8 | 9 | 3 | 12\n```\n::\n::\nasof_now_join(other, *on, how=JoinMode.INNER, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. Rows from self are not stored.\nThey are joined with rows of other at their processing time. If other is updated\nin the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT}\n which correspond to inner and left join respectively.\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nasof_now_join_inner(other, *on, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. Rows from self are not stored.\nThey are joined with rows of other at their processing time. If other is updated\nin the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nasof_now_join_left(other, *on, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. If there are no matching\nrows in other, missing values on the right side are replaced with None.\nRows from self are not stored. They are joined with rows of other at their processing\ntime. If other is updated in the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\ncast_to_types(kwargs)\nCasts columns to types.\nconcat(*others)\nConcats self with every other \u220a others.\nSemantics:\n- result.columns == self.columns == other.columns\n- result.id == self.id \u222a other.id\nif self.id and other.id collide, throws an exception.\nRequires:\n- other.columns == self.columns\n- self.id disjoint with other.id\n* Parameters\n other \u2013 the other table.\n* Returns\n *Table* \u2013 The concatenated table. Id\u2019s of rows from original tables are preserved.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''')\npw.universes.promise_are_pairwise_disjoint(t1, t2)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\nconcat_reindex(*tables)\nConcatenate contents of several tables.\nThis is similar to PySpark union. All tables must have the same schema. Each row is reindexed.\n* Parameters\n tables (`Table`) \u2013 List of tables to concatenate. All tables must have the same schema.\n* Returns\n *Table* \u2013 The concatenated table. It will have new, synthetic ids.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | pet\n1 | Dog\n7 | Cat\n''')\nt2 = pw.debug.table_from_markdown('''\n | pet\n1 | Manul\n8 | Octopus\n''')\nt3 = t1.concat_reindex(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\npet\nCat\nDog\nManul\nOctopus\n```\n::\n::\ncopy()\nReturns a copy of a table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.copy()\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n7 | Bob | dog\n8 | Alice | cat\n9 | Bob | dog\n10 | Alice | dog\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 is t2\n```\n::\nResult\n```\nFalse\n```\n::\n::\ndiff(timestamp, *values)\nCompute the difference between the values in the `values` columns and the previous values\naccording to the order defined by the column `timestamp`.\n* Parameters\n * timestamp (*-*) \u2013 The column reference to the `timestamp` column on\n which the order is computed.\n * \\*values (*-*) \u2013 Variable-length argument representing the column\n references to the `values` columns.\n* Returns\n `Table` \u2013 A new table where each column is replaced with a new column containing\n the difference and whose name is the concatenation of diff_ and the former name.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference.\nNOTE: * The value of the \u201cfirst\u201d value (the row with the lower value\n in the `timestamp` column) is `None`.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values\n1 | 1\n2 | 2\n3 | 4\n4 | 7\n5 | 11\n6 | 16\n''')\ntable += table.diff(pw.this.timestamp, pw.this.values)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values | diff_values\n1 | 1 |\n2 | 2 | 1\n3 | 4 | 2\n4 | 7 | 3\n5 | 11 | 4\n6 | 16 | 5\n```\n::\n::\ndifference(other)\nRestrict self universe to keys not appearing in the other table.\n* Parameters\n other (`Table`) \u2013 table with ids to remove from self.\n* Returns\n *Table* \u2013 table with restricted universe, with the same set of columns\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | cost\n2 | 100\n3 | 200\n4 | 300\n''')\nt3 = t1.difference(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n10 | Alice | 1\n```\n::\n::\nempty()\nCreates an empty table with a schema specified by kwargs.\n* Parameters\n kwargs (`DType`) \u2013 Dict whose keys are column names and values are column types.\n* Returns\n *Table* \u2013 Created empty table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.Table.empty(age=float, pet=float)\npw.debug.compute_and_print(t1, include_id=False)\n```\n::\nResult\n```\nage | pet\n```\n::\n::\nfilter(filter_expression)\nFilter a table according to filter condition.\n* Parameters\n filter \u2013 ColumnExpression that specifies the filtering condition.\n* Returns\n *Table* \u2013 Result has the same schema as self and its ids are subset of self.id.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nvertices = pw.debug.table_from_markdown('''\nlabel outdegree\n 1 3\n 7 0\n''')\nfiltered = vertices.filter(vertices.outdegree == 0)\npw.debug.compute_and_print(filtered, include_id=False)\n```\n::\nResult\n```\nlabel | outdegree\n7 | 0\n```\n::\n::\nflatten(*args, kwargs)\nPerforms a flatmap operation on a column or expression given as a first\nargument. Datatype of this column or expression has to be iterable.\nOther columns specified in the method arguments are duplicated\nas many times as the length of the iterable.\nIt is possible to get ids of source rows by using table.id column, e.g.\ntable.flatten(table.column_to_be_flattened, original_id = table.id).\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | pet | age\n1 | Dog | 2\n7 | Cat | 5\n''')\nt2 = t1.flatten(t1.pet)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\npet\nC\nD\na\ng\no\nt\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt3 = t1.flatten(t1.pet, t1.age)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\npet | age\nC | 5\nD | 2\na | 5\ng | 2\no | 2\nt | 5\n```\n::\n::\nfrom_columns(kwargs)\nBuild a table from columns.\nAll columns must have the same ids. Columns\u2019 names must be pairwise distinct.\n* Parameters\n * args (`ColumnReference`) \u2013 List of columns.\n * kwargs (`ColumnReference`) \u2013 Columns with their new names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.Table.empty(age=float, pet=float)\nt2 = pw.Table.empty(foo=float, bar=float).with_universe_of(t1)\nt3 = pw.Table.from_columns(t1.pet, qux=t2.foo)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\npet | qux\n```\n::\n::\ngroupby(*args, id=None, sort_by=None, _filter_out_results_of_forgetting=False)\nGroups table by columns from args.\nNOTE: Usually followed by .reduce() that aggregates the result and returns a table.\n* Parameters\n * args (`ColumnReference`) \u2013 columns to group by.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 if provided, is the column used to set id\u2019s of the rows of the result\n* Returns\n *GroupedTable* \u2013 Groupby object.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.groupby(t1.pet, t1.owner).reduce(t1.owner, t1.pet, ageagg=pw.reducers.sum(t1.age))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | pet | ageagg\nAlice | cat | 8\nAlice | dog | 10\nBob | dog | 16\n```\n::\n::\nhaving(*indexers)\nRemoves rows so that indexed.ix(indexer) is possible when some rows are missing,\nfor each indexer in indexers\nproperty id(: ColumnReference )\nGet reference to pseudocolumn containing id\u2019s of a table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.select(ids = t1.id)\nt2.typehints()['ids']\n```\n::\nResult\n```\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\npw.debug.compute_and_print(t2.select(test=t2.id == t2.ids), include_id=False)\n```\n::\nResult\n```\ntest\nTrue\nTrue\nTrue\nTrue\n```\n::\n::\ninterpolate(timestamp, *values, mode=InterpolateMode.LINEAR)\nInterpolates missing values in a column using the previous and next values based on a timestamps column.\n* Parameters\n * timestamp (*ColumnReference*) \u2013 Reference to the column containing timestamps.\n * \\*values (*ColumnReference*) \u2013 References to the columns containing values to be interpolated.\n * mode (*InterpolateMode, optional*) \u2013 The interpolation mode. Currently, only InterpolateMode.LINEAR is supported. Default is InterpolateMode.LINEAR.\n* Returns\n *Table* \u2013 A new table with the interpolated values.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference or if the interpolation mode is not supported.\nNOTE: * The interpolation is performed based on linear interpolation between the previous and next values.\n* If a value is missing at the beginning or end of the column, no interpolation is performed.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values_a | values_b\n1 | 1 | 10\n2 | |\n3 | 3 |\n4 | |\n5 | |\n6 | 6 | 60\n''')\ntable = table.interpolate(pw.this.timestamp, pw.this.values_a, pw.this.values_b)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values_a | values_b\n1 | 1 | 10\n2 | 2.0 | 20.0\n3 | 3 | 30.0\n4 | 4.0 | 40.0\n5 | 5.0 | 50.0\n6 | 6 | 60\n```\n::\n::\nintersect(*tables)\nRestrict self universe to keys appearing in all of the tables.\n* Parameters\n tables (`Table`) \u2013 tables keys of which are used to restrict universe.\n* Returns\n *Table* \u2013 table with restricted universe, with the same set of columns\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | cost\n2 | 100\n3 | 200\n4 | 300\n''')\nt3 = t1.intersect(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n```\n::\n::\ninterval_join(other, self_time, other_time, interval, *on, behavior=None, how=JoinMode.INNER)\nPerforms an interval join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n * how (`JoinMode`) \u2013 decides whether to run interval_join_inner, interval_join_left, interval_join_right\n or interval_join_outer. Default is INNER.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b, how=pw.JoinMode.INNER\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\ninterval_join_inner(other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_inner(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_inner(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\ninterval_join_left(other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval left join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows from the left\nside that haven\u2019t been matched with the right side are returned with missing\nvalues on the right side replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_left(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n11 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_left(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n1 | 11 |\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n3 | 4 |\n```\n::\n::\ninterval_join_outer(other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval outer join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows that haven\u2019t\nbeen matched with the other side are returned with missing values on the other\nside replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_outer(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 0\n | 7\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n11 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_outer(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n | | 0\n | | 2\n | | 7\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n1 | 11 |\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n3 | 4 |\n```\n::\n::\ninterval_join_right(other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval right join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows from the right\nside that haven\u2019t been matched with the left side are returned with missing\nvalues on the left side replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_right(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 0\n | 7\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_right(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n | | 0\n | | 2\n | | 7\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\nix(expression, *, optional=False, context=None)\nReindexes the table using expression values as keys. Uses keys from context, or tries to infer\nproper context from the expression.\nIf optional is True, then None in expression values result in None values in the result columns.\nMissing values in table keys result in RuntimeError.\nContext can be anything that allows for select or reduce, or pathway.this construct\n(latter results in returning a delayed operation, and should be only used when using ix inside\njoin().select() or groupby().reduce() sequence).\n* Returns\n Reindexed table with the same set of columns.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt_animals = pw.debug.table_from_markdown('''\n | epithet | genus\n1 | upupa | epops\n2 | acherontia | atropos\n3 | bubo | scandiacus\n4 | dynastes | hercules\n''')\nt_birds = pw.debug.table_from_markdown('''\n | desc\n2 | hoopoe\n4 | owl\n''')\nret = t_birds.select(t_birds.desc, latin=t_animals.ix(t_birds.id).genus)\npw.debug.compute_and_print(ret, include_id=False)\n```\n::\nResult\n```\ndesc | latin\nhoopoe | atropos\nowl | hercules\n```\n::\n::\nix_ref(*args, optional=False, context=None)\nReindexes the table using expressions as primary keys.\nUses keys from context, or tries to infer proper context from the expression.\nIf optional is True, then None in expression values result in None values in the result columns.\nMissing values in table keys result in RuntimeError.\nContext can be anything that allows for select or reduce, or pathway.this construct\n(latter results in returning a delayed operation, and should be only used when using ix inside\njoin().select() or groupby().reduce() sequence).\n* Parameters\n args (`Union`\\[`ColumnExpression`, `None`, `int`, `float`, `str`, `bytes`, `bool`, `Pointer`, `datetime`, `timedelta`, `ndarray`, `Json`, `dict`\\[`str`, `Any`\\], `tuple`\\[`Any`, `...`\\]\\]) \u2013 Column references.\n* Returns\n *Row* \u2013 indexed row.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nname | pet\nAlice | dog\nBob | cat\nCarole | cat\nDavid | dog\n''')\nt2 = t1.with_id_from(pw.this.name)\nt2 = t2.select(*pw.this, new_value=pw.this.ix_ref(\"Alice\").pet)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nname | pet | new_value\nAlice | dog | dog\nBob | cat | dog\nCarole | cat | dog\nDavid | dog | dog\n```\n::\n::\nTables obtained by a groupby/reduce scheme always have primary keys:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nname | pet\nAlice | dog\nBob | cat\nCarole | cat\nDavid | cat\n''')\nt2 = t1.groupby(pw.this.pet).reduce(pw.this.pet, count=pw.reducers.count())\nt3 = t1.select(*pw.this, new_value=t2.ix_ref(t1.pet).count)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nname | pet | new_value\nAlice | dog | 1\nBob | cat | 3\nCarole | cat | 3\nDavid | cat | 3\n```\n::\n::\nSingle-row tables can be accessed via ix_ref():\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nname | pet\nAlice | dog\nBob | cat\nCarole | cat\nDavid | cat\n''')\nt2 = t1.reduce(count=pw.reducers.count())\nt3 = t1.select(*pw.this, new_value=t2.ix_ref(context=t1).count)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nname | pet | new_value\nAlice | dog | 4\nBob | cat | 4\nCarole | cat | 4\nDavid | cat | 4\n```\n::\n::\njoin(other, *on, id=None, how=JoinMode.INNER)\nJoin self with other using the given join expression.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT,RIGHT,OUTER}\n correspond to inner, left, right and outer join respectively.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(\n t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER\n).select(age=t1.age, owner_name=t2.owner, size=t2.size)\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_inner(other, *on, id=None)\nInner-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER).select(age=t1.age, owner_name=t2.owner, size=t2.size) # noqa: E501\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_left(other, *on, id=None)\nLeft-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks:\nargs cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- rows from the right side that were not matched with the left side are skipped\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_left(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_outer(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_outer(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t1.id, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_right(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- rows from the left side that were not matched with the right side are skipped\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_right(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(pw.coalesce(t1.b,0) + t2.d,t1.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n```\n::\n::\n* Returns\n OuterJoinResult object\npointer_from(*args, optional=False)\nPseudo-random hash of its argument. Produces pointer types. Applied column-wise.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\ng = t1.groupby(t1.owner).reduce(refcol = t1.pointer_from(t1.owner)) # g.id == g.refcol\npw.debug.compute_and_print(g.select(test = (g.id == g.refcol)), include_id=False)\n```\n::\nResult\n```\ntest\nTrue\nTrue\n```\n::\n::\npromise_universe_is_equal_to(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the others.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n None\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nimport pytest\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 8 | Alice | cat\n2 | 9 | Bob | dog\n3 | 15 | Alice | tortoise\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | age | owner\n1 | 11 | Alice\n2 | 12 | Tom\n3 | 7 | Eve\n'''\n)\nt3 = t2.filter(pw.this.age > 10)\nwith pytest.raises(\n ValueError,\n match='Universe of the argument of Table.update_cells\\(\\) needs ' # noqa\n + 'to be a subset of the universe of the updated table.',\n):\n t1.update_cells(t3)\nt1 = t1.promise_universe_is_equal_to(t2)\nresult = t1.update_cells(t3)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n11 | Alice | cat\n12 | Tom | dog\n15 | Alice | tortoise\n```\n::\n::\npromise_universe_is_subset_of(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the other.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n''').promise_universe_is_subset_of(t1)\nt3 = t1 << t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\npromise_universes_are_disjoint(other)\nAsserts to Pathway that an universe of self is disjoint from universe of other.\nSemantics: Used in situations where Pathway cannot deduce universes are disjoint.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''').promise_universes_are_disjoint(t1)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\nreduce(*args, kwargs)\nReduce a table to a single row.\nEquivalent to self.groupby().reduce(\\*args, \\*\\*kwargs).\n* Parameters\n * args (`ColumnReference`) \u2013 reducer to reduce the table with\n * kwargs (`ColumnExpression`) \u2013 reducer to reduce the table with. Its key is the new name of a column.\n* Returns\n *Table* \u2013 Reduced table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.reduce(ageagg=pw.reducers.argmin(t1.age))\npw.debug.compute_and_print(t2, include_id=False) \n```\n::\nResult\n```\nageagg\n^...\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt3 = t2.select(t1.ix(t2.ageagg).age, t1.ix(t2.ageagg).pet)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | pet\n7 | dog\n```\n::\n::\nrename(names_mapping=None, kwargs)\nRename columns according either a dictionary or kwargs.\nIf a mapping is provided using a dictionary, `rename_by_dict` will be used.\nOtherwise, `rename_columns` will be used with kwargs.\nColumns not in keys(kwargs) are not changed. New name of a column must not be `id`.\n* Parameters\n * names_mapping (`Optional`\\`dict`\\[`str` | [`ColumnReference`, `str`\\]\\]) \u2013 mapping from old column names to new names.\n * kwargs (`ColumnExpression`) \u2013 mapping from old column names to new names.\n* Returns\n *Table* \u2013 self with columns renamed.\nrename_by_dict(names_mapping)\nRename columns according to a dictionary.\nColumns not in keys(kwargs) are not changed. New name of a column must not be id.\n* Parameters\n names_mapping (`dict`\\`str` | [`ColumnReference`, `str`\\]) \u2013 mapping from old column names to new names.\n* Returns\n *Table* \u2013 self with columns renamed.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.rename_by_dict({\"age\": \"years_old\", t1.pet: \"animal\"})\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | years_old | animal\nAlice | 8 | 2\nAlice | 10 | 1\nBob | 9 | 1\n```\n::\n::\nrename_columns(kwargs)\nRename columns according to kwargs.\nColumns not in keys(kwargs) are not changed. New name of a column must not be id.\n* Parameters\n kwargs (`str` | `ColumnReference`) \u2013 mapping from old column names to new names.\n* Returns\n *Table* \u2013 self with columns renamed.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.rename_columns(years_old=t1.age, animal=t1.pet)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | years_old | animal\nAlice | 8 | 2\nAlice | 10 | 1\nBob | 9 | 1\n```\n::\n::\nrestrict(other)\nRestrict self universe to keys appearing in other.\n* Parameters\n other (`TableLike`) \u2013 table which universe is used to restrict universe of self.\n* Returns\n *Table* \u2013 table with restricted universe, with the same set of columns\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | cost\n2 | 100\n3 | 200\n'''\n)\nt2.promise_universe_is_subset_of(t1)\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt3 = t1.restrict(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n```\n::\n::\nproperty schema(: type[pathway.internals.schema.Schema] )\nGet schema of the table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt1.schema\n```\n::\nResult\n```\n, 'owner': , 'pet': }>\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1.typehints()['age']\n```\n::\nResult\n```\n```\n::\n::\nselect(*args, kwargs)\nBuild a new table with columns specified by kwargs.\nOutput columns\u2019 names are keys(kwargs). values(kwargs) can be raw values, boxed\nvalues, columns. Assigning to id reindexes the table.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`Any`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\npet\nDog\nCat\n''')\nt2 = t1.select(animal=t1.pet, desc=\"fluffy\")\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nanimal | desc\nCat | fluffy\nDog | fluffy\n```\n::\n::\nproperty slice(: TableSlice )\nCreates a collection of references to self columns.\nSupports basic column manipulation methods.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt1.slice.without(\"age\")\n```\n::\nResult\n```\nTableSlice({'owner': .owner, 'pet': .pet})\n```\n::\n::\nsort(key, instance=None)\nSorts a table by the specified keys.\n* Parameters\n * table \u2013 pw.Table\n The table to be sorted.\n * key (`ColumnExpression`) \u2013 ColumnReference\n An expression to sort by.\n * instance (`Optional`\\[`ColumnExpression`\\]) \u2013 ColumnReference or None\n An expression with instance. Rows are sorted within an instance.\n `prev` and `next` columns will only point to rows that have the same instance.\n* Returns\n *pw.Table* \u2013 The sorted table. Contains two columns: `prev` and `next`, containing the pointers\n to the previous and next rows.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\nname | age | score\nAlice | 25 | 80\nBob | 20 | 90\nCharlie | 30 | 80\n''')\ntable = table.with_id_from(pw.this.name)\ntable += table.sort(key=pw.this.age)\npw.debug.compute_and_print(table, include_id=True)\n```\n::\nResult\n```\n | name | age | score | prev | next\n^GBSDEEW... | Alice | 25 | 80 | ^EDPSSB1... | ^DS9AT95...\n^EDPSSB1... | Bob | 20 | 90 | | ^GBSDEEW...\n^DS9AT95... | Charlie | 30 | 80 | ^GBSDEEW... |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\ntable = pw.debug.table_from_markdown('''\nname | age | score\nAlice | 25 | 80\nBob | 20 | 90\nCharlie | 30 | 80\nDavid | 35 | 90\nEve | 15 | 80\n''')\ntable = table.with_id_from(pw.this.name)\ntable += table.sort(key=pw.this.age, instance=pw.this.score)\npw.debug.compute_and_print(table, include_id=True)\n```\n::\nResult\n```\n | name | age | score | prev | next\n^GBSDEEW... | Alice | 25 | 80 | ^T0B95XH... | ^DS9AT95...\n^EDPSSB1... | Bob | 20 | 90 | | ^RT0AZWX...\n^DS9AT95... | Charlie | 30 | 80 | ^GBSDEEW... |\n^RT0AZWX... | David | 35 | 90 | ^EDPSSB1... |\n^T0B95XH... | Eve | 15 | 80 | | ^GBSDEEW...\n```\n::\n::\ntypehints()\nReturn the types of the columns as a dictionary.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt1.typehints()\n```\n::\nResult\n```\nmappingproxy({'age': , 'owner': , 'pet': })\n```\n::\n::\nupdate_cells(other)\nUpdates cells of self, breaking ties in favor of the values in other.\nSemantics:\n * result.columns == self.columns\n * result.id == self.id\n * conflicts are resolved preferring other\u2019s values\nRequires:\n * other.columns \u2286 self.columns\n * other.id \u2286 self.id\n* Parameters\n other (`Table`) \u2013 the other table.\n* Returns\n *Table* \u2013 self updated with cells form other.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n age | owner | pet\n1 | 10 | Alice | 30\n''')\npw.universes.promise_is_subset_of(t2, t1)\nt3 = t1.update_cells(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\nupdate_rows(other)\nUpdates rows of self, breaking ties in favor for the rows in other.\nSemantics:\n- result.columns == self.columns == other.columns\n- result.id == self.id \u222a other.id\nRequires:\n- other.columns == self.columns\n* Parameters\n other (`Table`\\`TypeVar`(`TSchema`, bound= [`Schema`)\\]) \u2013 the other table.\n* Returns\n *Table* \u2013 self updated with rows form other.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n12 | 12 | Tom | 40\n''')\nt3 = t1.update_rows(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n12 | Tom | 40\n```\n::\n::\nupdate_types(kwargs)\nUpdates types in schema. Has no effect on the runtime.\nwindow_join(other, self_time, other_time, window, *on, how=JoinMode.INNER)\nPerforms a window join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * how (`JoinMode`) \u2013 decides whether to run window_join_inner, window_join_left, window_join_right\n or window_join_outer. Default is INNER.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n```\n::\n::\nwindow_join_inner(other, self_time, other_time, window, *on)\nPerforms a window join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_inner(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_inner(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n```\n::\n::\nwindow_join_left(other, self_time, other_time, window, *on)\nPerforms a window left join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from the left side that didn\u2019t match with any record on the right side in\na given window, are returned with missing values on the right side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 |\n1 | 2\n2 | 2\n2 | 2\n3 |\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n13 |\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_left(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n10 |\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_left(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n2 | 7 |\n3 | 4 |\n```\n::\n::\nwindow_join_outer(other, self_time, other_time, window, *on)\nPerforms a window outer join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from both sides that didn\u2019t match with any record on the other side in\na given window, are returned with missing values on the other side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n | 5\n | 6\n1 |\n1 | 2\n2 | 2\n2 | 2\n3 |\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n13 |\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | | 5\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n4 | | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_outer(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | -3\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n10 |\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_outer(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | | 10\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n2 | 7 |\n3 | 4 |\n4 | | 3\n```\n::\n::\nwindow_join_right(other, self_time, other_time, window, *on)\nPerforms a window right join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from the right side that didn\u2019t match with any record on the left side in\na given window, are returned with missing values on the left side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n | 5\n | 6\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t2.b, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | | 5\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n4 | | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_right(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | -3\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_right(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t2.b, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | | 10\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n4 | | 3\n```\n::\n::\nwindowby(time_expr, *, window, behavior=None, shard=None)\nCreate a GroupedTable by windowing the table (based on expr and window),\noptionally sharded with shard\n* Parameters\n * time_expr (`ColumnExpression`) \u2013 Column expression used for windowing\n * window (`Window`) \u2013 type window to use\n * shard (`Optional`\\[`ColumnExpression`\\]) \u2013 optional column expression to act as a shard key\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t | v\n1 | 0 | 1 | 10\n2 | 0 | 2 | 1\n3 | 0 | 4 | 3\n4 | 0 | 8 | 2\n5 | 0 | 9 | 4\n6 | 0 | 10| 8\n7 | 1 | 1 | 9\n8 | 1 | 2 | 16\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.session(predicate=lambda a, b: abs(a-b) <= 1), shard=t.shard\n).reduce(\npw.this.shard,\nmin_t=pw.reducers.min(pw.this.t),\nmax_v=pw.reducers.max(pw.this.v),\ncount=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nshard | min_t | max_v | count\n0 | 1 | 10 | 2\n0 | 4 | 3 | 1\n0 | 8 | 8 | 3\n1 | 1 | 16 | 2\n```\n::\n::\nwith_columns(*args, kwargs)\nUpdates columns of self, according to args and kwargs.\nSee table.select specification for evaluation of args and kwargs.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | owner | pet | size\n1 | Tom | 1 | 10\n2 | Bob | 1 | 9\n3 | Tom | 2 | 8\n''').with_universe_of(t1)\nt3 = t1.with_columns(*t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet | size\n8 | Tom | 2 | 8\n9 | Bob | 1 | 9\n10 | Tom | 1 | 10\n```\n::\n::\nwith_id(new_index)\nSet new ids based on another column containing id-typed values.\nTo generate ids based on arbitrary valued columns, use with_id_from.\nValues assigned must be row-wise unique.\n* Parameters\n new_id \u2013 column to be used as the new index.\n* Returns\n Table with updated ids.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pytest; pytest.xfail(\"with_id is hard to test\")\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | new_id\n1 | 2\n2 | 3\n3 | 4\n''')\nt3 = t1.promise_universe_is_subset_of(t2).with_id(t2.new_id)\npw.debug.compute_and_print(t3)\n```\n::\nResult\n```\n age owner pet\n^2 10 Alice 1\n^3 9 Bob 1\n^4 8 Alice 2\n```\n::\n::\nwith_id_from(*args)\nCompute new ids based on values in columns.\nIds computed from columns must be row-wise unique.\n* Parameters\n columns \u2013 columns to be used as primary keys.\n* Returns\n *Table* \u2013 self updated with recomputed ids.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n 1 | 10 | Alice | 1\n 2 | 9 | Bob | 1\n 3 | 8 | Alice | 2\n''')\nt2 = t1 + t1.select(old_id=t1.id)\nt3 = t2.with_id_from(t2.age)\npw.debug.compute_and_print(t3) \n```\n::\nResult\n```\n | age | owner | pet | old_id\n^... | 8 | Alice | 2 | ^...\n^... | 9 | Bob | 1 | ^...\n^... | 10 | Alice | 1 | ^...\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t3.select(t3.age, t3.owner, t3.pet, same_as_old=(t3.id == t3.old_id),\n same_as_new=(t3.id == t3.pointer_from(t3.age)))\npw.debug.compute_and_print(t4) \n```\n::\nResult\n```\n | age | owner | pet | same_as_old | same_as_new\n^... | 8 | Alice | 2 | False | True\n^... | 9 | Bob | 1 | False | True\n^... | 10 | Alice | 1 | False | True\n```\n::\n::\nwith_prefix(prefix)\nRename columns by adding prefix to each name of column.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.with_prefix(\"u_\")\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nu_age | u_owner | u_pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n```\n::\n::\nwith_suffix(suffix)\nRename columns by adding suffix to each name of column.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.with_suffix(\"_current\")\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nage_current | owner_current | pet_current\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n```\n::\n::\nwith_universe_of(other)\nReturns a copy of self with exactly the same universe as others.\nSemantics: Required precondition self.universe == other.universe\nUsed in situations where Pathway cannot deduce equality of universes, but\nthose are equal as verified during runtime.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | pet\n1 | Dog\n7 | Cat\n''')\nt2 = pw.debug.table_from_markdown('''\n | age\n1 | 10\n7 | 3\n''').with_universe_of(t1)\nt3 = t1 + t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\npet | age\nCat | 3\nDog | 10\n```\n::\n::\nwithout(*columns)\nSelects all columns without named column references.\n* Parameters\n columns (`str` | `ColumnReference`) \u2013 columns to be dropped provided by table.column_name notation.\n* Returns\n *Table* \u2013 self without specified columns.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = t1.without(t1.age, pw.this.pet)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner\nAlice\nAlice\nBob\n```\n::\n::\nclass pw.TableLike(context)\nInterface class for table-likes: Table, GroupedTable and JoinResult.\nAll of those contain universe info, and thus support universe-related asserts.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\ng1 = t1.groupby(t1.owner)\nt2 = t1.filter(t1.age >= 9)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n9 | Bob | dog\n10 | Alice | dog\n```\n::\n::\n```python\ng2 = t2.groupby(t2.owner)\npw.universes.promise_is_subset_of(g2, g1) # t2 is a subset of t1, so this is safe\n```\npromise_universe_is_equal_to(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the others.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n None\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nimport pytest\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 8 | Alice | cat\n2 | 9 | Bob | dog\n3 | 15 | Alice | tortoise\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | age | owner\n1 | 11 | Alice\n2 | 12 | Tom\n3 | 7 | Eve\n'''\n)\nt3 = t2.filter(pw.this.age > 10)\nwith pytest.raises(\n ValueError,\n match='Universe of the argument of Table.update_cells\\(\\) needs ' # noqa\n + 'to be a subset of the universe of the updated table.',\n):\n t1.update_cells(t3)\nt1 = t1.promise_universe_is_equal_to(t2)\nresult = t1.update_cells(t3)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n11 | Alice | cat\n12 | Tom | dog\n15 | Alice | tortoise\n```\n::\n::\npromise_universe_is_subset_of(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the other.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n''').promise_universe_is_subset_of(t1)\nt3 = t1 << t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\npromise_universes_are_disjoint(other)\nAsserts to Pathway that an universe of self is disjoint from universe of other.\nSemantics: Used in situations where Pathway cannot deduce universes are disjoint.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''').promise_universes_are_disjoint(t1)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.io.redpanda package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.redpanda package\nFunctions\npw.io.redpanda.read(rdkafka_settings, topic=None, *, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None, topic_names=None)\nReads table from a set of topics in Redpanda.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of\n librdkafka.\n * topic (`UnionType`\\[`str`, `list`\\[`str`\\], `None`\\]) \u2013 Name of topic in Redpanda from which the data should be read.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the field which require such mapping, it should be\n given in the format : , where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table, required for format other than\n \u201craw\u201d. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated as uuid4. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (pw.Type) of the values of those columns. This parameter is optional, and if not\n provided the default type is pw.Type.ANY. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named data.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider a simple instance of Redpanda without authentication. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"plaintext\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\"\n}\n```\nTo connect to the topic \u201canimals\u201d and accept messages, the connector must be used as follows, depending on the format:\nRaw version:\n```python\nimport pathway as pw\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"raw\",\n)\n```\nAll the data will be accessible in the column data.\nCSV version:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"csv\",\n schema=InputSchema,\n)\n```\nIn case of CSV format, the first message must be the header:\n```csv\nowner,pet\n```\nThen, simple data rows are expected. For example:\n```csv\nAlice,cat\nBob,dog\n```\nThis way, you get a table which looks as follows:\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice cat\n Bob dog\n```\n::\n::\nJSON version:\n```python\nimport pathway as pw\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"json\",\n schema=InputSchema,\n)\n```\nFor the JSON connector, you can send these two messages:\n```json\n{\"owner\": \"Alice\", \"pet\": \"cat\"}\n{\"owner\": \"Bob\", \"pet\": \"dog\"}\n```\nThis way, you get a table which looks as follows:\n"} -{"doc": "---\ntitle: pathway.io.redpanda package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.redpanda package\nFunctions\npw.io.redpanda.read(rdkafka_settings, topic=None, *, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None, topic_names=None)\nReads table from a set of topics in Redpanda.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of\n librdkafka.\n * topic (`UnionType`\\[`str`, `list`\\[`str`\\], `None`\\]) \u2013 Name of topic in Redpanda from which the data should be read.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the field which require such mapping, it should be\n given in the format : , where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table, required for format other than\n \u201craw\u201d. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated as uuid4. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (pw.Type) of the values of those columns. This parameter is optional, and if not\n provided the default type is pw.Type.ANY. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named data.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider a simple instance of Redpanda without authentication. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"plaintext\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\"\n}\n```\nTo connect to the topic \u201canimals\u201d and accept messages, the connector must be used as follows, depending on the format:\nRaw version:\n```python\nimport pathway as pw\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"raw\",\n)\n```\nAll the data will be accessible in the column data.\nCSV version:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"csv\",\n schema=InputSchema,\n)\n```\nIn case of CSV format, the first message must be the header:\n```csv\nowner,pet\n```\nThen, simple data rows are expected. For example:\n```csv\nAlice,cat\nBob,dog\n```\nThis way, you get a table which looks as follows:\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice cat\n Bob dog\n```\n::\n::\nNow consider that the data about pets come in a more sophisticated way. For instance\nyou have an owner, kind and name of an animal, along with some physical measurements.\nThe JSON payload in this case may look as follows:\n```json\n{\n \"name\": \"Jack\",\n \"pet\": {\n \"animal\": \"cat\",\n \"name\": \"Bob\",\n \"measurements\": [100, 200, 300]\n }\n}\n```\nSuppose you need to extract a name of the pet and the height, which is the 2nd\n(1-based) or the 1st (0-based) element in the array of measurements. Then, you\nuse JSON Pointer and do a connector, which gets the data as follows:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n pet_name: str\n pet_height: int\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"json\",\n schema=InputSchema,\n json_field_paths={\n \"pet_name\": \"/pet/name\",\n \"pet_height\": \"/pet/measurements/1\"\n },\n)\n```\npw.io.redpanda.write(table, rdkafka_settings, topic_name, *, format='json', kwargs)\nWrite a table to a given topic on a Redpanda instance.\n* Parameters\n * table (`Table`) \u2013 the table to output.\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of librdkafka.\n * topic_name (`str`) \u2013 name of topic in Redpanda to which the data should be sent.\n * format (`str`) \u2013 format of the input data, only \u201cjson\u201d is currently supported.\n* Returns\n None\nLimitations:\nFor future proofing, the format is configurable, but (for now) only JSON is available.\nExample:\nConsider there is a queue in Redpanda, running locally on port 9092. Our queue can\nuse SASL-SSL authentication over a SCRAM-SHA-256 mechanism. You can set up a queue\nwith similar parameters in Upstash. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nYou want to send a Pathway table t to the Redpanda instance.\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nTo connect to the topic \u201canimals\u201d and send messages, the connector must be used as follows, depending on the format:\nJSON version:\n```python\npw.io.redpanda.write(\n t,\n rdkafka_settings,\n \"animals\",\n format=\"json\",\n)\n```\nAll the updates of table t will be sent to the Redpanda instance.\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-26T01:23:00\n 2 | 2023-03-27T01:23:00\n 3 | 2023-10-29T01:23:00\n 4 | 2023-10-30T01:23:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nt3 = t2.with_columns(\n new_date=pw.this.date.dt.add_duration_in_timezone(\n datetime.timedelta(hours=2), timezone=\"Europe/Warsaw\"\n ),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ndate | new_date\n2023-03-26 01:23:00 | 2023-03-26 04:23:00\n2023-03-27 01:23:00 | 2023-03-27 03:23:00\n2023-10-29 01:23:00 | 2023-10-29 02:23:00\n2023-10-30 01:23:00 | 2023-10-30 03:23:00\n```\n::\n::\nday()\nExtracts day from a DateTime.\n* Returns\n Day as int. 1 <= day <= 31 (depending on a month)\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1974-03-12T00:00:00\n 2 | 2023-03-25T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\npw.debug.compute_and_print(table_with_days, include_id=False)\n```\n::\nResult\n```\nday\n12\n15\n25\n```\n::\n::\ndays()\nThe total number of days in a Duration.\n* Returns\n Days as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-03-15T00:00:00 | 2023-05-15T10:13:23\n 1 | 2023-04-15T00:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-01T10:00:00 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:00:00 | 2023-05-15T09:00:00\n 4 | 2023-05-15T10:00:00 | 2023-05-15T11:00:00\n 5 | 2023-05-16T12:13:00 | 2023-05-15T10:00:00\n 6 | 2024-05-15T14:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_days = table_with_diff.select(days=pw.this[\"diff\"].dt.days())\npw.debug.compute_and_print(table_with_days, include_id=False)\n```\n::\nResult\n```\ndays\n-61\n-30\n-14\n0\n0\n1\n366\n```\n::\n::\nfloor(duration)\nTruncates DateTime to precision specified by duration argument.\n* Parameters\n duration (`ColumnExpression` | `Timedelta` | `str`) \u2013 truncation precision\nNOTE: Duration can be given as a string, in such case we accept aliases used\nby Pandas\nthat represent a fixed duration, so e.g. \u201cM\u201d will not be accepted.\nFor ambiguous frequencies, you can use other methods, e.g. `column.dt.month()`\ninstead of `column.dt.floor(\"1M\")`.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-05-15T12:23:12\n 2 | 2023-05-15T12:33:21\n 3 | 2023-05-15T13:20:35\n 4 | 2023-05-15T13:51:41\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nres = t2.with_columns(\n truncated_to_hours=pw.this.date.dt.floor(datetime.timedelta(hours=1)),\n truncated_to_10_min=pw.this.date.dt.floor(datetime.timedelta(minutes=10)),\n truncated_to_15_s=pw.this.date.dt.floor(datetime.timedelta(seconds=15)),\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\ndate | truncated_to_hours | truncated_to_10_min | truncated_to_15_s\n2023-05-15 12:23:12 | 2023-05-15 12:00:00 | 2023-05-15 12:20:00 | 2023-05-15 12:23:00\n2023-05-15 12:33:21 | 2023-05-15 12:00:00 | 2023-05-15 12:30:00 | 2023-05-15 12:33:15\n2023-05-15 13:20:35 | 2023-05-15 13:00:00 | 2023-05-15 13:20:00 | 2023-05-15 13:20:30\n2023-05-15 13:51:41 | 2023-05-15 13:00:00 | 2023-05-15 13:50:00 | 2023-05-15 13:51:30\n```\n::\n::\nfrom_timestamp(unit)\nConverts timestamp represented as an int to DateTime.\n* Parameters\n * timestamp \u2013 value to be converted to DateTime\n * unit (`str`) \u2013 unit of a timestamp. It has to be one of \u2018s\u2019, \u2018ms\u2019, \u2018us\u2019, \u2018ns\u2019\n* Returns\n DateTime\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt1 = pw.debug.table_from_markdown(\n '''\n | timestamp\n1 | 10\n2 | 1685969950\n'''\n)\nt2 = t1.select(date=pw.this.timestamp.dt.from_timestamp(unit=\"s\"))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ndate\n1970-01-01 00:00:10\n2023-06-05 12:59:10\n```\n::\n::\nhour()\nExtracts hour from a DateTime.\n* Returns\n Hour as int. 0 <= hour < 24\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T00:00:00\n 2 | 2023-05-15T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_hours = table_with_datetime.select(hour=table_with_datetime.t1.dt.hour())\npw.debug.compute_and_print(table_with_hours, include_id=False)\n```\n::\nResult\n```\nhour\n0\n12\n14\n```\n::\n::\nhours()\nThe total number of hours in a Duration.\n* Returns\n Hours as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T00:00:00 | 2023-05-15T10:13:23\n 1 | 2023-05-15T00:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-15T10:00:00 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:00:23 | 2023-05-15T10:00:00\n 4 | 2023-05-15T12:13:00 | 2023-05-15T10:00:00\n 5 | 2023-05-15T14:13:23 | 2023-05-15T10:00:00\n 6 | 2023-05-16T10:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_hours = table_with_diff.select(hours=pw.this[\"diff\"].dt.hours())\npw.debug.compute_and_print(table_with_hours, include_id=False)\n```\n::\nResult\n```\nhours\n-10\n-10\n0\n0\n2\n4\n24\n```\n::\n::\nmicrosecond()\nExtracts microseconds from a DateTime.\n* Returns\n Microsecond as int. 0 <= microsecond < 1_000_000\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.000012000\n 3 | 2023-05-15T10:13:00.123456789\n 4 | 2023-05-15T10:13:23.123456789\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_microseconds = table_with_datetime.select(\n microsecond=table_with_datetime.t1.dt.microsecond()\n)\npw.debug.compute_and_print(table_with_microseconds, include_id=False)\n```\n::\nResult\n```\nmicrosecond\n0\n12\n123456\n123456\n```\n::\n::\nmicroseconds()\nThe total number of microseconds in a Duration.\n* Returns\n Microseconds as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:23.123456789\n 1 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.000012000 | 2023-05-15T10:13:00.000000000\n 3 | 2023-05-15T10:13:00.123456789 | 2023-05-15T10:13:00.000000000\n 4 | 2023-05-15T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n 5 | 2023-05-16T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_microseconds = table_with_diff.select(\n microseconds=pw.this[\"diff\"].dt.microseconds()\n)\npw.debug.compute_and_print(table_with_microseconds, include_id=False)\n```\n::\nResult\n```\nmicroseconds\n-23123456\n0\n12\n123456\n23123456\n86423123456\n```\n::\n::\nmillisecond()\nExtracts milliseconds from a DateTime.\n* Returns\n Millisecond as int. 0 <= millisecond < 1_000\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.012000000\n 3 | 2023-05-15T10:13:00.123456789\n 4 | 2023-05-15T10:13:23.123456789\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_milliseconds = table_with_datetime.select(\n millisecond=table_with_datetime.t1.dt.millisecond()\n)\npw.debug.compute_and_print(table_with_milliseconds, include_id=False)\n```\n::\nResult\n```\nmillisecond\n0\n12\n123\n123\n```\n::\n::\nmilliseconds()\nThe total number of milliseconds in a Duration.\n* Returns\n Milliseconds as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:23.123456789\n 1 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.012000000 | 2023-05-15T10:13:00.000000000\n 3 | 2023-05-15T10:13:00.123456789 | 2023-05-15T10:13:00.000000000\n 4 | 2023-05-15T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n 5 | 2023-05-16T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_milliseconds = table_with_diff.select(\n milliseconds=pw.this[\"diff\"].dt.milliseconds()\n)\npw.debug.compute_and_print(table_with_milliseconds, include_id=False)\n```\n::\nResult\n```\nmilliseconds\n-23123\n0\n12\n123\n23123\n86423123\n```\n::\n::\nminute()\nExtracts minute from a DateTime.\n* Returns\n Minute as int. 0 <= minute < 60\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:00:00\n 2 | 2023-05-15T10:00:23\n 3 | 2023-05-15T10:13:00\n 4 | 2023-05-15T10:13:23\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_minutes = table_with_datetime.select(\n minute=table_with_datetime.t1.dt.minute()\n)\npw.debug.compute_and_print(table_with_minutes, include_id=False)\n```\n::\nResult\n```\nminute\n0\n0\n13\n13\n```\n::\n::\nminutes()\nThe total number of minutes in a Duration.\n* Returns\n Minutes as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:00:00 | 2023-05-15T10:13:23\n 1 | 2023-05-15T10:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-15T10:00:23 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:13:00 | 2023-05-15T10:00:00\n 4 | 2023-05-15T10:13:23 | 2023-05-15T10:00:00\n 5 | 2023-05-16T10:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_minutes = table_with_diff.select(minutes=pw.this[\"diff\"].dt.minutes())\npw.debug.compute_and_print(table_with_minutes, include_id=False)\n```\n::\nResult\n```\nminutes\n-13\n0\n0\n13\n13\n1453\n```\n::\n::\nmonth()\nExtracts month from a DateTime.\n* Returns\n Month as int. 1 <= month <= 12\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1974-03-12T00:00:00\n 2 | 2023-03-25T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_months = table_with_datetime.select(month=table_with_datetime.t1.dt.month())\npw.debug.compute_and_print(table_with_months, include_id=False)\n```\n::\nResult\n```\nmonth\n3\n3\n5\n```\n::\n::\nnanosecond()\nExtracts nanoseconds from a DateTime.\n* Returns\n Nanosecond as int. 0 <= nanosecond < 1_000_000_000\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.000000012\n 3 | 2023-05-15T10:13:00.123456789\n 4 | 2023-05-15T10:13:23.123456789\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_nanoseconds = table_with_datetime.select(\n nanosecond=table_with_datetime.t1.dt.nanosecond()\n)\npw.debug.compute_and_print(table_with_nanoseconds, include_id=False)\n```\n::\nResult\n```\nnanosecond\n0\n12\n123456789\n123456789\n```\n::\n::\nnanoseconds()\nThe total number of nanoseconds in a Duration.\n* Returns\n Nanoseconds as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:23.123456789\n 1 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.000000012 | 2023-05-15T10:13:00.000000000\n 3 | 2023-05-15T10:13:00.123456789 | 2023-05-15T10:13:00.000000000\n 4 | 2023-05-15T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n 5 | 2023-05-16T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_nanoseconds = table_with_diff.select(\n nanoseconds=pw.this[\"diff\"].dt.nanoseconds()\n)\npw.debug.compute_and_print(table_with_nanoseconds, include_id=False)\n```\n::\nResult\n```\nnanoseconds\n-23123456789\n0\n12\n123456789\n23123456789\n86423123456789\n```\n::\n::\nround(duration)\nRounds DateTime to precision specified by duration argument.\n* Parameters\n duration (`ColumnExpression` | `Timedelta` | `str`) \u2013 rounding precision\nNOTE: Duration can be given as a string, in such case we accept aliases used\nby Pandas\nthat represent a fixed duration, so e.g. \u201cM\u201d will not be accepted.\nFor ambiguous frequencies, you can use other methods, e.g. `column.dt.month()`\ninstead of `column.dt.floor(\"1M\")`.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-05-15T12:23:12\n 2 | 2023-05-15T12:33:21\n 3 | 2023-05-15T13:20:35\n 4 | 2023-05-15T13:51:41\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nres = t2.with_columns(\n rounded_to_hours=pw.this.date.dt.round(datetime.timedelta(hours=1)),\n rounded_to_10_min=pw.this.date.dt.round(datetime.timedelta(minutes=10)),\n rounded_to_15_s=pw.this.date.dt.round(datetime.timedelta(seconds=15)),\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\ndate | rounded_to_hours | rounded_to_10_min | rounded_to_15_s\n2023-05-15 12:23:12 | 2023-05-15 12:00:00 | 2023-05-15 12:20:00 | 2023-05-15 12:23:15\n2023-05-15 12:33:21 | 2023-05-15 13:00:00 | 2023-05-15 12:30:00 | 2023-05-15 12:33:15\n2023-05-15 13:20:35 | 2023-05-15 13:00:00 | 2023-05-15 13:20:00 | 2023-05-15 13:20:30\n2023-05-15 13:51:41 | 2023-05-15 14:00:00 | 2023-05-15 13:50:00 | 2023-05-15 13:51:45\n```\n::\n::\nsecond()\nExtracts seconds from a DateTime.\n* Returns\n Second as int. 0 <= second < 60\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.123456789\n 3 | 2023-05-15T10:13:23.000000000\n 4 | 2023-05-15T10:13:23.123456789\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_seconds = table_with_datetime.select(\n second=table_with_datetime.t1.dt.second()\n)\npw.debug.compute_and_print(table_with_seconds, include_id=False)\n```\n::\nResult\n```\nsecond\n0\n0\n23\n23\n```\n::\n::\nseconds()\nThe total number of seconds in a Duration.\n* Returns\n Seconds as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:23.123456789\n 1 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.123456789 | 2023-05-15T10:13:00.000000000\n 3 | 2023-05-15T10:13:23.000000000 | 2023-05-15T10:13:00.000000000\n 4 | 2023-05-15T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n 5 | 2023-05-16T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_seconds = table_with_diff.select(seconds=pw.this[\"diff\"].dt.seconds())\npw.debug.compute_and_print(table_with_seconds, include_id=False)\n```\n::\nResult\n```\nseconds\n-23\n0\n0\n23\n23\n86423\n```\n::\n::\nstrftime(fmt)\nConverts a DateTime to a string.\n* Parameters\n fmt (`ColumnExpression` | `str`) \u2013 Format string. We use the specifiers of chrono library. In most cases they are identical to standard python specifiers in strftime .\n* Returns\n str\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1970-02-03T10:13:00\n 2 | 2023-03-25T10:13:00\n 3 | 2023-03-26T12:13:00\n 4 | 2023-05-15T14:13:23\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetime = table.select(t1=pw.this.t1.dt.strptime(fmt=fmt))\ntable_formatted = table_with_datetime.select(\n date=pw.this.t1.dt.strftime(\"%d.%m.%Y\"),\n full_date=pw.this.t1.dt.strftime(\"%B %d, %Y\"),\n time_24=pw.this.t1.dt.strftime(\"%H:%M:%S\"),\n time_12=pw.this.t1.dt.strftime(\"%I:%M:%S %p\"),\n)\npw.debug.compute_and_print(table_formatted, include_id=False)\n```\n::\nResult\n```\ndate | full_date | time_24 | time_12\n03.02.1970 | February 03, 1970 | 10:13:00 | 10:13:00 AM\n15.05.2023 | May 15, 2023 | 14:13:23 | 02:13:23 PM\n25.03.2023 | March 25, 2023 | 10:13:00 | 10:13:00 AM\n26.03.2023 | March 26, 2023 | 12:13:00 | 12:13:00 PM\n```\n::\n::\nstrptime(fmt, contains_timezone=None)\nConverts a string to a DateTime. If the string contains a timezone and\na %z specifier is used, timezone-aware DateTime is created.\nThen the timezone is converted to a server timezone (see examples).\nIf the string contains no timezone, a naive (not aware of timezone) DateTime\nis created.\n* Parameters\n fmt (`ColumnExpression` | `str`) \u2013 Format string. We use the specifiers of chrono library. In most cases they are identical to standard python specifiers in strptime . contains_timezone: If fmt is not a single string (the same for all objects) but a ColumnExpression, you need to set this parameter so that the function can determine if the return type is DateTimeNaive (contains_timezone = False) or DateTimeUtc (contains_timezone = True).\n* Returns\n DateTime\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1970-02-03T10:13:00.000000000\n 2 | 2023-03-25T10:13:00.000000012\n 3 | 2023-03-26T12:13:00.123456789\n 4 | 2023-05-15T14:13:23.123456789\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(fmt=fmt))\npw.debug.compute_and_print(table_with_datetime, include_id=False)\n```\n::\nResult\n```\nt1\n1970-02-03 10:13:00\n2023-03-25 10:13:00.000000012\n2023-03-26 12:13:00.123456789\n2023-05-15 14:13:23.123456789\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 03.02.1970T10:13:00.000000000\n 2 | 25.03.2023T10:13:00.000000012\n 3 | 26.03.2023T12:13:00.123456789\n 4 | 15.05.2023T14:13:23.123456789\n'''\n)\nfmt = \"%d.%m.%YT%H:%M:%S.%f\"\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(fmt=fmt))\npw.debug.compute_and_print(table_with_datetime, include_id=False)\n```\n::\nResult\n```\nt1\n1970-02-03 10:13:00\n2023-03-25 10:13:00.000000012\n2023-03-26 12:13:00.123456789\n2023-05-15 14:13:23.123456789\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1970-02-03T10:13:00-02:00\n 2 | 2023-03-25T10:13:00+00:00\n 3 | 2023-03-26T12:13:00-01:00\n 4 | 2023-05-15T14:13:23+00:30\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S%z\"\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(fmt=fmt))\npw.debug.compute_and_print(table_with_datetime, include_id=False)\n```\n::\nResult\n```\nt1\n1970-02-03 12:13:00+00:00\n2023-03-25 10:13:00+00:00\n2023-03-26 13:13:00+00:00\n2023-05-15 13:43:23+00:00\n```\n::\n::\nsubtract_date_time_in_timezone(date_time, timezone)\nSubtracts two DateTimeNaives taking into account time zone.\n* Parameters\n * date_time (`ColumnExpression` | `Timestamp`) \u2013 DateTimeNaive to be subtracted from self.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform subtraction in.\n* Returns\n Duration\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | date1 | date2\n 1 | 2023-03-26T03:20:00 | 2023-03-26T01:20:00\n 2 | 2023-03-27T03:20:00 | 2023-03-27T01:20:00\n 3 | 2023-10-29T03:20:00 | 2023-10-29T01:20:00\n 4 | 2023-10-30T03:20:00 | 2023-10-30T01:20:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(\n date1=pw.this.date1.dt.strptime(fmt=fmt), date2=pw.this.date2.dt.strptime(fmt=fmt)\n)\nt3 = t2.with_columns(\n diff=pw.this.date1.dt.subtract_date_time_in_timezone(\n pw.this.date2, timezone=\"Europe/Warsaw\"\n ),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ndate1 | date2 | diff\n2023-03-26 03:20:00 | 2023-03-26 01:20:00 | 0 days 01:00:00\n2023-03-27 03:20:00 | 2023-03-27 01:20:00 | 0 days 02:00:00\n2023-10-29 03:20:00 | 2023-10-29 01:20:00 | 0 days 03:00:00\n2023-10-30 03:20:00 | 2023-10-30 01:20:00 | 0 days 02:00:00\n```\n::\n::\nsubtract_duration_in_timezone(duration, timezone)\nSubtracts Duration from DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be subtracted from DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform subtraction in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-26T03:23:00\n 2 | 2023-03-27T03:23:00\n 3 | 2023-10-29T03:23:00\n 4 | 2023-10-30T03:23:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nt3 = t2.with_columns(\n new_date=pw.this.date.dt.subtract_duration_in_timezone(\n datetime.timedelta(hours=2), timezone=\"Europe/Warsaw\"\n ),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ndate | new_date\n2023-03-26 03:23:00 | 2023-03-26 00:23:00\n2023-03-27 03:23:00 | 2023-03-27 01:23:00\n2023-10-29 03:23:00 | 2023-10-29 02:23:00\n2023-10-30 03:23:00 | 2023-10-30 01:23:00\n```\n::\n::\ntimestamp()\nReturns a number of nanoseconds from 1970-01-01 for naive DateTime\nand from 1970-01-01 UTC for timezone-aware datetime.\n* Returns\n Timestamp as int.\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 0 | 1969-01-01T00:00:00.000000000\n 1 | 1970-01-01T00:00:00.000000000\n 2 | 2023-01-01T00:00:00.000000000\n 3 | 2023-03-25T00:00:00.000000000\n 4 | 2023-03-25T13:45:26.000000000\n 5 | 2023-03-25T13:45:26.987654321\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_timestamp = table_with_datetime.select(\n timestamp=table_with_datetime.t1.dt.timestamp()\n)\npw.debug.compute_and_print(table_with_timestamp, include_id=False)\n```\n::\nResult\n```\ntimestamp\n-31536000000000000\n0\n1672531200000000000\n1679702400000000000\n1679751926000000000\n1679751926987654321\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1969-01-01T00:00:00.000000000+00:00\n 2 | 1970-01-01T00:00:00.000000000+00:00\n 3 | 1970-01-01T00:00:00.000000000+02:00\n 4 | 1970-01-01T00:00:00.000000000-03:00\n 5 | 2023-01-01T00:00:00.000000000+01:00\n 6 | 2023-03-25T00:00:00.000000000+01:00\n 7 | 2023-03-25T13:45:26.000000000+01:00\n 8 | 2023-03-25T13:45:26.987654321+01:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f%z\"))\ntable_with_timestamp = table_with_datetime.select(\n timestamp=table_with_datetime.t1.dt.timestamp()\n)\npw.debug.compute_and_print(table_with_timestamp, include_id=False)\n```\n::\nResult\n```\ntimestamp\n-31536000000000000\n-7200000000000\n0\n10800000000000\n1672527600000000000\n1679698800000000000\n1679748326000000000\n1679748326987654321\n```\n::\n::\nto_naive_in_timezone(timezone)\nConverts DateTimeUtc to time zone specified as timezone argument.\n* Parameters\n timezone (`ColumnExpression` | `str`) \u2013 The time zone to convert to.\n* Returns\n DateTimeNaive\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | date_utc\n 1 | 2023-03-26T00:59:00+00:00\n 2 | 2023-03-26T01:00:00+00:00\n 3 | 2023-03-27T00:59:00+00:00\n 4 | 2023-03-27T01:00:00+00:00\n 5 | 2023-10-28T23:59:00+00:00\n 6 | 2023-10-29T00:00:00+00:00\n 7 | 2023-10-29T00:30:00+00:00\n 8 | 2023-10-29T01:00:00+00:00\n 9 | 2023-10-29T01:30:00+00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S%z\"\ntable_utc = table.select(date_utc=pw.this.date_utc.dt.strptime(fmt=fmt))\ntable_local = table_utc.with_columns(\n date=pw.this.date_utc.dt.to_naive_in_timezone(timezone=\"Europe/Warsaw\"),\n)\npw.debug.compute_and_print(table_local, include_id=False)\n```\n::\nResult\n```\ndate_utc | date\n2023-03-26 00:59:00+00:00 | 2023-03-26 01:59:00\n2023-03-26 01:00:00+00:00 | 2023-03-26 03:00:00\n2023-03-27 00:59:00+00:00 | 2023-03-27 02:59:00\n2023-03-27 01:00:00+00:00 | 2023-03-27 03:00:00\n2023-10-28 23:59:00+00:00 | 2023-10-29 01:59:00\n2023-10-29 00:00:00+00:00 | 2023-10-29 02:00:00\n2023-10-29 00:30:00+00:00 | 2023-10-29 02:30:00\n2023-10-29 01:00:00+00:00 | 2023-10-29 02:00:00\n2023-10-29 01:30:00+00:00 | 2023-10-29 02:30:00\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | date_utc\n 1 | 2023-03-12T09:59:00+00:00\n 2 | 2023-03-12T10:00:00+00:00\n 3 | 2023-03-13T09:59:00+00:00\n 4 | 2023-03-13T10:00:00+00:00\n 5 | 2023-11-05T07:59:00+00:00\n 6 | 2023-11-05T08:00:00+00:00\n 7 | 2023-11-05T08:30:00+00:00\n 8 | 2023-11-05T09:00:00+00:00\n 9 | 2023-11-05T09:30:00+00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S%z\"\ntable_utc = table.select(date_utc=pw.this.date_utc.dt.strptime(fmt=fmt))\ntable_local = table_utc.with_columns(\n date=pw.this.date_utc.dt.to_naive_in_timezone(timezone=\"America/Los_Angeles\"),\n)\npw.debug.compute_and_print(table_local, include_id=False)\n```\n::\nResult\n```\ndate_utc | date\n2023-03-12 09:59:00+00:00 | 2023-03-12 01:59:00\n2023-03-12 10:00:00+00:00 | 2023-03-12 03:00:00\n2023-03-13 09:59:00+00:00 | 2023-03-13 02:59:00\n2023-03-13 10:00:00+00:00 | 2023-03-13 03:00:00\n2023-11-05 07:59:00+00:00 | 2023-11-05 00:59:00\n2023-11-05 08:00:00+00:00 | 2023-11-05 01:00:00\n2023-11-05 08:30:00+00:00 | 2023-11-05 01:30:00\n2023-11-05 09:00:00+00:00 | 2023-11-05 01:00:00\n2023-11-05 09:30:00+00:00 | 2023-11-05 01:30:00\n```\n::\n::\nto_utc(from_timezone)\nConverts DateTimeNaive to UTC from time zone provided as from_timezone\nargument. If the given DateTime doesn\u2019t exist in the provided time zone it is\nmapped to the first existing DateTime after it. If a given DateTime corresponds\nto more than one moments in the provided time zone, it is mapped to a later\nmoment.\n* Parameters\n from_timezone (`ColumnExpression` | `str`) \u2013 The time zone to convert from.\n* Returns\n DateTimeUtc\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-26T01:59:00\n 2 | 2023-03-26T02:30:00\n 3 | 2023-03-26T03:00:00\n 4 | 2023-03-27T01:59:00\n 5 | 2023-03-27T02:30:00\n 6 | 2023-03-27T03:00:00\n 7 | 2023-10-29T01:59:00\n 8 | 2023-10-29T02:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_local = table.select(date=pw.this.date.dt.strptime(fmt=fmt))\ntable_utc = table_local.with_columns(\n date_utc=pw.this.date.dt.to_utc(from_timezone=\"Europe/Warsaw\"),\n)\npw.debug.compute_and_print(table_utc, include_id=False)\n```\n::\nResult\n```\ndate | date_utc\n2023-03-26 01:59:00 | 2023-03-26 00:59:00+00:00\n2023-03-26 02:30:00 | 2023-03-26 01:00:00+00:00\n2023-03-26 03:00:00 | 2023-03-26 01:00:00+00:00\n2023-03-27 01:59:00 | 2023-03-26 23:59:00+00:00\n2023-03-27 02:30:00 | 2023-03-27 00:30:00+00:00\n2023-03-27 03:00:00 | 2023-03-27 01:00:00+00:00\n2023-10-29 01:59:00 | 2023-10-28 23:59:00+00:00\n2023-10-29 02:00:00 | 2023-10-29 01:00:00+00:00\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-12T01:59:00\n 2 | 2023-03-12T02:30:00\n 3 | 2023-03-12T03:00:00\n 4 | 2023-03-13T01:59:00\n 5 | 2023-03-13T02:30:00\n 6 | 2023-03-13T03:00:00\n 7 | 2023-11-05T00:59:00\n 8 | 2023-11-05T01:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_local = table.select(date=pw.this.date.dt.strptime(fmt=fmt))\ntable_utc = table_local.with_columns(\n date_utc=pw.this.date.dt.to_utc(from_timezone=\"America/Los_Angeles\"),\n)\npw.debug.compute_and_print(table_utc, include_id=False)\n```\n::\nResult\n```\ndate | date_utc\n2023-03-12 01:59:00 | 2023-03-12 09:59:00+00:00\n2023-03-12 02:30:00 | 2023-03-12 10:00:00+00:00\n2023-03-12 03:00:00 | 2023-03-12 10:00:00+00:00\n2023-03-13 01:59:00 | 2023-03-13 08:59:00+00:00\n2023-03-13 02:30:00 | 2023-03-13 09:30:00+00:00\n2023-03-13 03:00:00 | 2023-03-13 10:00:00+00:00\n2023-11-05 00:59:00 | 2023-11-05 07:59:00+00:00\n2023-11-05 01:00:00 | 2023-11-05 09:00:00+00:00\n```\n::\n::\nweekday()\nConverts a DateTime to an int representing its day of the week, where 0 denotes\na Monday, and 6 denotes a Sunday.\n* Returns\n int\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1970-02-03T10:13:00\n 2 | 2023-03-25T10:13:00\n 3 | 2023-03-26T12:13:00\n 4 | 2023-05-15T14:13:23\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetime = table.select(t1=pw.this.t1.dt.strptime(fmt=fmt))\ntable_with_dayofweek = table_with_datetime.with_columns(weekday=pw.this.t1.dt.weekday())\npw.debug.compute_and_print(table_with_dayofweek, include_id=False)\n```\n::\nResult\n```\nt1 | weekday\n1970-02-03 10:13:00 | 1\n2023-03-25 10:13:00 | 5\n2023-03-26 12:13:00 | 6\n2023-05-15 14:13:23 | 0\n```\n::\n::\nweeks()\nThe total number of weeks in a Duration.\n* Returns\n Weeks as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-03-15T00:00:00 | 2023-05-15T10:13:23\n 1 | 2023-04-15T00:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-01T10:00:00 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:00:00 | 2023-05-15T09:00:00\n 4 | 2023-05-15T10:00:00 | 2023-05-15T11:00:00\n 5 | 2023-05-16T12:13:00 | 2023-05-15T10:00:00\n 6 | 2024-05-15T14:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_weeks = table_with_diff.select(weeks=pw.this[\"diff\"].dt.weeks())\npw.debug.compute_and_print(table_with_weeks, include_id=False)\n```\n::\nResult\n```\nweeks\n-8\n-4\n-2\n0\n0\n0\n52\n```\n::\n::\nyear()\nExtracts year from a DateTime.\n* Returns\n Year as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1974-03-12T00:00:00\n 2 | 2023-03-25T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_years = table_with_datetime.select(year=table_with_datetime.t1.dt.year())\npw.debug.compute_and_print(table_with_years, include_id=False)\n```\n::\nResult\n```\nyear\n1974\n2023\n2023\n```\n::\n::\nclass pw.NumericalNamespace(expression)\nA module containing methods related to numbers.\nThey can be called using a num attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v\n 1 | -1\n'''\n)\ntable_abs = table.select(v_abs=table.v.num.abs())\n```\nabs()\nReturns the absolute value from a numerical value.\n* Returns\n Absolute value as float\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v\n 1 | 1\n 2 | -1\n 3 | 2.5\n 4 | -2.5\n'''\n)\ntable_abs = table.select(v_abs=table.v.num.abs())\npw.debug.compute_and_print(table_abs, include_id=False)\n```\n::\nResult\n```\nv_abs\n1.0\n1.0\n2.5\n2.5\n```\n::\n::\nfill_na(default_value)\nFill the missing values (None or NaN) in a column of a table with a specified default value.\n* Parameters\n default_value (*float*) \u2013 The value to fill in for the missing values.\n* Returns\n A new column with the missing values filled with the specified default value.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v\n 1 | 1\n 2 | 2.0\n 3 | None\n 4 | 3.5\n'''\n)\ntable_fill_na = table.select(v_filled=table.v.num.fill_na(0))\npw.debug.compute_and_print(table_fill_na, include_id=False)\n```\n::\nResult\n```\nv_filled\n0.0\n1.0\n2.0\n3.5\n```\n::\n::\nround(decimals=0)\nRound the values in a column of a table to the specified number of decimals.\n* Parameters\n * decimals (`ColumnExpression` | `int`) \u2013 The number of decimal places to round to. It can be either an\n * 0. (*integer or a reference to another column. Defaults to*) \u2013 \n* Returns\n A new column with the values rounded to the specified number of decimals.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v\n 1 | -2.18\n 2 | -1.11\n 3 | 1\n 4 | 2.1\n 5 | 3.14\n 6 | 4.17\n'''\n)\ntable_round = table.select(v_round=table.v.num.round(1))\npw.debug.compute_and_print(table_round, include_id=False)\n```\n::\nResult\n```\nv_round\n-2.2\n-1.1\n1.0\n2.1\n3.1\n4.2\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v | precision\n 1 | 3 | 0\n 2 | 3.1 | 1\n 3 | 3.14 | 1\n 4 | 3.141 | 2\n 5 | 3.1415 | 2\n'''\n)\ntable_round = table.select(v_round=table.v.num.round(pw.this.precision))\npw.debug.compute_and_print(table_round, include_id=False)\n```\n::\nResult\n```\nv_round\n3.0\n3.1\n3.1\n3.14\n3.14\n```\n::\n::\nclass pw.StringNamespace(expression)\nA module containing methods related to string.\nThey can be called using a str attribute of an expression.\nTypical use:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | ALICE\n'''\n)\ntable += table.select(name_lower=table.name.str.lower())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_lower\nALICE | alice\n```\n::\n::\ncount(sub, start=None, end=None)\nReturns the number of non-overlapping occurrences of substring sub in the range \\[start, end).\nOptional arguments start and end are interpreted as in slice notation.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Hello\n 3 | World\n 4 | Zoo\n'''\n)\ntable += table.select(count=table.name.str.count(\"o\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | count\nAlice | 0\nHello | 1\nWorld | 1\nZoo | 2\n```\n::\n::\nendswith(suffix)\nReturns True if the string ends with suffix.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(ends_with_e=table.name.str.endswith(\"e\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | ends_with_e\nAlice | True\nBob | False\nCAROLE | False\ndavid | False\n```\n::\n::\nfind(sub, start=None, end=None)\nReturn the lowest index in the string where substring sub is found within\nthe slice s\\[start:end\\]. Optional arguments start and end are interpreted as in\nslice notation. Return -1 if sub is not found.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Hello\n 3 | World\n 4 | Zoo\n'''\n)\ntable += table.select(pos=table.name.str.find(\"o\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | pos\nAlice | -1\nHello | 4\nWorld | 1\nZoo | 1\n```\n::\n::\nlen()\nReturns the length of a string.\n* Returns\n Length of the string\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(length=table.name.str.len())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | length\nAlice | 5\nBob | 3\nCAROLE | 6\ndavid | 5\n```\n::\n::\nlower()\nReturns a lowercase copy of a string.\n* Returns\n Lowercase string\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_lower=table.name.str.lower())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_lower\nAlice | alice\nBob | bob\nCAROLE | carole\ndavid | david\n```\n::\n::\nparse_bool(true_values=['on', 'true', 'yes', '1'], false_values=['off', 'false', 'no', '0'], optional=False)\nParses the string to bool, by checking if given string is either in\ntrue_values or false_values. The given string and all values in true_vales and\nfalse_values are made lowercase, so parsing is case insensitive.\nWhen true_values and false_values arguments are\nnot provided, strings \u201cTrue\u201d, \u201cOn\u201d, \u201c1\u201d and \u201cYes\u201d are interpreted as True value,\nand \u201cFalse\u201d, \u201cOff\u201d, \u201c0\u201d, and \u201cNo\u201d are interpreted as False.\nIf true_values or false_values is provided, then these values are mapped to\nrespectively True and False, while all other either raise an exception or return\nNone, depending on argument optional.\nIf optional argument is set to True, then the\nreturn type is Optional\\[bool\\] and if some string cannot be parsed, None is\nreturned.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\ndf = pd.DataFrame({\"a\": [\"0\", \"TRUE\", \"on\"]}, dtype=str)\ntable = pw.debug.table_from_pandas(df)\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\na\n0\nTRUE\non\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = table.select(a=table.a.str.parse_bool())\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\na\nFalse\nTrue\nTrue\n```\n::\n::\nparse_float(optional=False)\nParses the string to float. If optional argument is set to True, then the\nreturn type is Optional\\[float\\] and if some string cannot be parsed, None is\nreturned.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\ndf = pd.DataFrame({\"a\": [\"-5\", \"0.1\", \"200.999\"]}, dtype=str)\ntable = pw.debug.table_from_pandas(df)\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = table.select(a=table.a.str.parse_float())\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\na\n-5.0\n0.1\n200.999\n```\n::\n::\nparse_int(optional=False)\nParses the string to int. If optional argument is set to True, then the\nreturn type is Optional\\[int\\] and if some string cannot be parsed, None is\nreturned.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\ndf = pd.DataFrame({\"a\": [\"-5\", \"0\", \"200\"]}, dtype=str)\ntable = pw.debug.table_from_pandas(df)\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = table.select(a=table.a.str.parse_int())\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\na\n-5\n0\n200\n```\n::\n::\nremoveprefix(prefix, /)\nIf the string starts with prefix, returns a copy of the string without the prefix.\nOtherwise returns the original string.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(without_da=table.name.str.removeprefix(\"da\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | without_da\nAlice | Alice\nBob | Bob\nCAROLE | CAROLE\ndavid | vid\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | note | prefix\n 1 | AAA | A\n 2 | BB | B\n'''\n)\ntable = table.select(\n pw.this.note,\n new_note=pw.this.note.str.removeprefix(pw.this.prefix)\n)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nnote | new_note\nAAA | AA\nBB | B\n```\n::\n::\nremovesuffix(suffix, /)\nIf the string ends with suffix, returns a copy of the string without the suffix.\nOtherwise returns the original string.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(without_LE=table.name.str.removesuffix(\"LE\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | without_LE\nAlice | Alice\nBob | Bob\nCAROLE | CARO\ndavid | david\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | fruit | suffix\n 1 | bamboo | o\n 2 | banana | na\n'''\n)\ntable = table.select(\n pw.this.fruit,\n fruit_cropped=pw.this.fruit.str.removesuffix(pw.this.suffix)\n)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nfruit | fruit_cropped\nbamboo | bambo\nbanana | bana\n```\n::\n::\nreplace(old_value, new_value, count=-1, /)\nReturns the a string where the occurrences of the old_value substrings are\n replaced by the new_value substring.\n* Parameters\n count (`ColumnExpression` | `int`) \u2013 Maximum number of occurrences to replace. When set to -1, replaces\n all occurrences. Defaults to -1.\n* Returns\n The new string where old_value is replaced by new_value\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n 5 | Edward\n'''\n)\ntable += table.select(name_replace=table.name.str.replace(\"d\",\"Z\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_replace\nAlice | Alice\nBob | Bob\nCAROLE | CAROLE\nEdward | EZwarZ\ndavid | ZaviZ\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | value | old | new | count\n 1 | Scaciscics | c | t | 3\n 2 | yelliwwiid | i | o | 2\n'''\n)\ntable = table.select(\n pw.this.value,\n value_replace=pw.this.value.str.replace(\n pw.this.old, pw.this.new, pw.this.count\n )\n)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nvalue | value_replace\nScaciscics | Statistics\nyelliwwiid | yellowwoid\n```\n::\n::\nreversed()\nReturns a reverse copy of a string.\n* Returns\n Reverse string\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_reverse=table.name.str.reversed())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_reverse\nAlice | ecilA\nBob | boB\nCAROLE | ELORAC\ndavid | divad\n```\n::\n::\nrfind(sub, start=None, end=None)\nReturn the highest index in the string where substring sub is found within\nthe slice s\\[start:end\\]. Optional arguments start and end are interpreted as in\nslice notation. Return -1 if sub is not found.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Hello\n 3 | World\n 4 | Zoo\n'''\n)\ntable += table.select(pos=table.name.str.rfind(\"o\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | pos\nAlice | -1\nHello | 4\nWorld | 1\nZoo | 2\n```\n::\n::\nslice(start, end, /)\nReturn a slice of the string.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(slice=table.name.str.slice(1,4))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | slice\nAlice | lic\nBob | ob\nCAROLE | ARO\ndavid | avi\n```\n::\n::\nstartswith(prefix)\nReturns True if the string starts with prefix.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(starts_with_A=table.name.str.startswith(\"A\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | starts_with_A\nAlice | True\nBob | False\nCAROLE | False\ndavid | False\n```\n::\n::\nstrip(chars=None)\nReturns a copy of the string with specified leading and trailing characters\nremoved. If no arguments are passed, remove the leading and trailing whitespaces.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_strip=table.name.str.strip(\"Aod\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_strip\nAlice | lice\nBob | Bob\nCAROLE | CAROLE\ndavid | avi\n```\n::\n::\nswapcase()\nReturns a copy of the string where the case is inverted.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_swap=table.name.str.swapcase())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_swap\nAlice | aLICE\nBob | bOB\nCAROLE | carole\ndavid | DAVID\n```\n::\n::\ntitle()\nReturns a copy of the string where where words start with an uppercase character\nand the remaining characters are lowercase.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | col\n 1 | title\n'''\n)\ntable = table.select(col_title=table[\"col\"].str.title())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ncol_title\nTitle\n```\n::\n::\nupper()\nReturns a uppercase copy of a string.\n* Returns\n Uppercase string\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_upper=table.name.str.upper())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_upper\nAlice | ALICE\nBob | BOB\nCAROLE | CAROLE\ndavid | DAVID\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(10).as_integer_ratio()\n```\n::\nResult\n```\n(10, 1)\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(-10).as_integer_ratio()\n```\n::\nResult\n```\n(-10, 1)\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(0).as_integer_ratio()\n```\n::\nResult\n```\n(0, 1)\n```\n::\n::\nbit_count()\nNumber of ones in the binary representation of the absolute value of self.\nAlso known as the population count.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\nbin(13)\n```\n::\nResult\n```\n'0b1101'\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(13).bit_count()\n```\n::\nResult\n```\n3\n```\n::\n::\nbit_length()\nNumber of bits necessary to represent self in binary.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\nbin(37)\n```\n::\nResult\n```\n'0b100101'\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(37).bit_length()\n```\n::\nResult\n```\n6\n```\n::\n::\nconjugate()\nReturns self, the complex conjugate of any int.\ndenominator()\nthe denominator of a rational number in lowest terms\nfrom_bytes(byteorder='big', *, signed=False)\nReturn the integer represented by the given array of bytes.\nbytes\n Holds the array of bytes to convert. The argument must either\n support the buffer protocol or be an iterable object producing bytes.\n Bytes and bytearray are examples of built-in objects that support the\n buffer protocol.\nbyteorder\n The byte order used to represent the integer. If byteorder is \u2018big\u2019,\n the most significant byte is at the beginning of the byte array. If\n byteorder is \u2018little\u2019, the most significant byte is at the end of the\n byte array. To request the native byte order of the host system, use\n ```\n `\n ```\n sys.byteorder\u2019 as the byte order value. Default is to use \u2018big\u2019.\nsigned\n Indicates whether two\u2019s complement is used to represent the integer.\nimag()\nthe imaginary part of a complex number\nnumerator()\nthe numerator of a rational number in lowest terms\nreal()\nthe real part of a complex number\nto_bytes(length=1, byteorder='big', *, signed=False)\nReturn an array of bytes representing an integer.\nlength\n Length of bytes object to use. An OverflowError is raised if the\n integer is not representable with the given number of bytes. Default\n is length 1.\nbyteorder\n The byte order used to represent the integer. If byteorder is \u2018big\u2019,\n the most significant byte is at the beginning of the byte array. If\n byteorder is \u2018little\u2019, the most significant byte is at the end of the\n byte array. To request the native byte order of the host system, use\n ```\n `\n ```\n sys.byteorder\u2019 as the byte order value. Default is to use \u2018big\u2019.\nsigned\n Determines whether two\u2019s complement is used to represent the integer.\n If signed is False and a negative integer is given, an OverflowError\n is raised.\nclass pw.ml.smart_table_ops.FuzzyJoinNormalization(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(10).as_integer_ratio()\n```\n::\nResult\n```\n(10, 1)\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(-10).as_integer_ratio()\n```\n::\nResult\n```\n(-10, 1)\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(0).as_integer_ratio()\n```\n::\nResult\n```\n(0, 1)\n```\n::\n::\nbit_count()\nNumber of ones in the binary representation of the absolute value of self.\nAlso known as the population count.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\nbin(13)\n```\n::\nResult\n```\n'0b1101'\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(13).bit_count()\n```\n::\nResult\n```\n3\n```\n::\n::\nbit_length()\nNumber of bits necessary to represent self in binary.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\nbin(37)\n```\n::\nResult\n```\n'0b100101'\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(37).bit_length()\n```\n::\nResult\n```\n6\n```\n::\n::\nconjugate()\nReturns self, the complex conjugate of any int.\ndenominator()\nthe denominator of a rational number in lowest terms\nfrom_bytes(byteorder='big', *, signed=False)\nReturn the integer represented by the given array of bytes.\nbytes\n Holds the array of bytes to convert. The argument must either\n support the buffer protocol or be an iterable object producing bytes.\n Bytes and bytearray are examples of built-in objects that support the\n buffer protocol.\nbyteorder\n The byte order used to represent the integer. If byteorder is \u2018big\u2019,\n the most significant byte is at the beginning of the byte array. If\n byteorder is \u2018little\u2019, the most significant byte is at the end of the\n byte array. To request the native byte order of the host system, use\n ```\n `\n ```\n sys.byteorder\u2019 as the byte order value. Default is to use \u2018big\u2019.\nsigned\n Indicates whether two\u2019s complement is used to represent the integer.\nimag()\nthe imaginary part of a complex number\nnumerator()\nthe numerator of a rational number in lowest terms\nreal()\nthe real part of a complex number\nto_bytes(length=1, byteorder='big', *, signed=False)\nReturn an array of bytes representing an integer.\nlength\n Length of bytes object to use. An OverflowError is raised if the\n integer is not representable with the given number of bytes. Default\n is length 1.\nbyteorder\n The byte order used to represent the integer. If byteorder is \u2018big\u2019,\n the most significant byte is at the beginning of the byte array. If\n byteorder is \u2018little\u2019, the most significant byte is at the end of the\n byte array. To request the native byte order of the host system, use\n ```\n `\n ```\n sys.byteorder\u2019 as the byte order value. Default is to use \u2018big\u2019.\nsigned\n Determines whether two\u2019s complement is used to represent the integer.\n If signed is False and a negative integer is given, an OverflowError\n is raised.\nclass pw.ml.smart_table_ops.JoinResult()\nclass pw.ml.smart_table_ops.Node()\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice 1\n2 9 Bob 1\n3 8 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n age owner pet size\n11 10 Alice 3 M\n12 9 Bob 1 L\n13 8 Tom 1 XL\n''')\njoinresult= t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner) # noqa: E501\nisinstance(joinresult, pw.JoinResult)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\npw.debug.compute_and_print(joinresult.select(t1.age, t2.size), include_id=False)\n```\n::\nResult\n```\nage | size\n9 | L\n```\n::\n::\nproperty C(: ColumnNamespace )\nReturns the namespace of all the columns of a joinable.\nAllows accessing column names that might otherwise be a reserved methods.\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\ntab = pw.debug.table_from_markdown('''\nage | owner | pet | filter\n10 | Alice | dog | True\n9 | Bob | dog | True\n8 | Alice | cat | False\n7 | Bob | dog | True\n''')\nisinstance(tab.C.age, pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\npw.debug.compute_and_print(tab.filter(tab.C.filter), include_id=False)\n```\n::\nResult\n```\nage | owner | pet | filter\n7 | Bob | dog | True\n9 | Bob | dog | True\n10 | Alice | dog | True\n```\n::\n::\nfilter(filter_expression)\nFilters rows, keeping the ones satisfying the predicate.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice 1\n2 9 Bob 1\n3 8 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n age owner pet size\n11 10 Alice 3 M\n12 9 Bob 1 L\n13 8 Tom 1 XL\n''')\nresult = t1.join(t2).filter(t1.owner == t2.owner).select(t1.age, t2.size) # noqa: E501\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | size\n8 | M\n9 | L\n10 | M\n```\n::\n::\ngroupby(*args, id=None)\nGroups join result by columns from args.\nNOTE: Usually followed by .reduce() that aggregates the result and returns a table.\n* Parameters\n * args (`ColumnReference`) \u2013 columns to group by.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 if provided, is the column used to set id\u2019s of the rows of the result\n* Returns\n *GroupedJoinResult* \u2013 Groupby object.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n cost owner pet\n1 100 Alice 1\n2 90 Bob 1\n3 80 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n cost owner pet size\n11 100 Alice 3 M\n12 90 Bob 1 L\n13 80 Tom 1 XL\n''')\nresult = (t1.join(t2, t1.owner==t2.owner).groupby(pw.this.owner)\n .reduce(pw.this.owner, pairs = pw.reducers.count()))\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nowner | pairs\nAlice | 2\nBob | 1\n```\n::\n::\njoin(other, *on, id=None, how=JoinMode.INNER)\nJoin self with other using the given join expression.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT,RIGHT,OUTER}\n correspond to inner, left, right and outer join respectively.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(\n t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER\n).select(age=t1.age, owner_name=t2.owner, size=t2.size)\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_inner(other, *on, id=None)\nInner-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER).select(age=t1.age, owner_name=t2.owner, size=t2.size) # noqa: E501\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_left(other, *on, id=None)\nLeft-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks:\nargs cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- rows from the right side that were not matched with the left side are skipped\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_left(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_outer(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_outer(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t1.id, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_right(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- rows from the left side that were not matched with the right side are skipped\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_right(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(pw.coalesce(t1.b,0) + t2.d,t1.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n```\n::\n::\n* Returns\n OuterJoinResult object\npromise_universe_is_equal_to(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the others.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n None\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nimport pytest\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 8 | Alice | cat\n2 | 9 | Bob | dog\n3 | 15 | Alice | tortoise\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | age | owner\n1 | 11 | Alice\n2 | 12 | Tom\n3 | 7 | Eve\n'''\n)\nt3 = t2.filter(pw.this.age > 10)\nwith pytest.raises(\n ValueError,\n match='Universe of the argument of Table.update_cells\\(\\) needs ' # noqa\n + 'to be a subset of the universe of the updated table.',\n):\n t1.update_cells(t3)\nt1 = t1.promise_universe_is_equal_to(t2)\nresult = t1.update_cells(t3)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n11 | Alice | cat\n12 | Tom | dog\n15 | Alice | tortoise\n```\n::\n::\npromise_universe_is_subset_of(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the other.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n''').promise_universe_is_subset_of(t1)\nt3 = t1 << t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\npromise_universes_are_disjoint(other)\nAsserts to Pathway that an universe of self is disjoint from universe of other.\nSemantics: Used in situations where Pathway cannot deduce universes are disjoint.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''').promise_universes_are_disjoint(t1)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\nreduce(*args, kwargs)\nReduce a join result to a single row.\nEquivalent to self.groupby().reduce(\\*args, \\*\\*kwargs).\n* Parameters\n * args (`ColumnReference`) \u2013 reducer to reduce the table with\n * kwargs (`ColumnExpression`) \u2013 reducer to reduce the table with. Its key is the new name of a column.\n* Returns\n *Table* \u2013 Reduced table.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n cost owner pet\n1 100 Alice 1\n2 90 Bob 1\n3 80 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n cost owner pet size\n11 100 Alice 3 M\n12 90 Bob 1 L\n13 80 Tom 1 XL\n''')\nresult = t1.join(t2, t1.owner==t2.owner).reduce(total_pairs = pw.reducers.count())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\ntotal_pairs\n3\n```\n::\n::\nselect(*args, kwargs)\nComputes result of a join.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`Any`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner).select(age=t1.age, owner_name=t2.owner, size=t2.size) # noqa: E501\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\nclass pw.Joinable(context)\nproperty C(: ColumnNamespace )\nReturns the namespace of all the columns of a joinable.\nAllows accessing column names that might otherwise be a reserved methods.\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\ntab = pw.debug.table_from_markdown('''\nage | owner | pet | filter\n10 | Alice | dog | True\n9 | Bob | dog | True\n8 | Alice | cat | False\n7 | Bob | dog | True\n''')\nisinstance(tab.C.age, pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\npw.debug.compute_and_print(tab.filter(tab.C.filter), include_id=False)\n```\n::\nResult\n```\nage | owner | pet | filter\n7 | Bob | dog | True\n9 | Bob | dog | True\n10 | Alice | dog | True\n```\n::\n::\njoin(other, *on, id=None, how=JoinMode.INNER)\nJoin self with other using the given join expression.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT,RIGHT,OUTER}\n correspond to inner, left, right and outer join respectively.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(\n t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER\n).select(age=t1.age, owner_name=t2.owner, size=t2.size)\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_inner(other, *on, id=None)\nInner-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER).select(age=t1.age, owner_name=t2.owner, size=t2.size) # noqa: E501\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_left(other, *on, id=None)\nLeft-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks:\nargs cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- rows from the right side that were not matched with the left side are skipped\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_left(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_outer(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_outer(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t1.id, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_right(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- rows from the left side that were not matched with the right side are skipped\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_right(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(pw.coalesce(t1.b,0) + t2.d,t1.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n```\n::\n::\n* Returns\n OuterJoinResult object\npromise_universe_is_equal_to(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the others.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n None\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nimport pytest\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 8 | Alice | cat\n2 | 9 | Bob | dog\n3 | 15 | Alice | tortoise\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | age | owner\n1 | 11 | Alice\n2 | 12 | Tom\n3 | 7 | Eve\n'''\n)\nt3 = t2.filter(pw.this.age > 10)\nwith pytest.raises(\n ValueError,\n match='Universe of the argument of Table.update_cells\\(\\) needs ' # noqa\n + 'to be a subset of the universe of the updated table.',\n):\n t1.update_cells(t3)\nt1 = t1.promise_universe_is_equal_to(t2)\nresult = t1.update_cells(t3)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n11 | Alice | cat\n12 | Tom | dog\n15 | Alice | tortoise\n```\n::\n::\npromise_universe_is_subset_of(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the other.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n''').promise_universe_is_subset_of(t1)\nt3 = t1 << t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\npromise_universes_are_disjoint(other)\nAsserts to Pathway that an universe of self is disjoint from universe of other.\nSemantics: Used in situations where Pathway cannot deduce universes are disjoint.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''').promise_universes_are_disjoint(t1)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\n"} -{"doc": "---\ntitle: Groupby API\nsidebar: 'API'\nnavigation: true\n---\n# Groupby API\nContains reference for helper classes related to groupby.\nclass pw.GroupedJoinResult(*, join_result, args, id)\nreduce(*args, kwargs)\nReduces grouped join result to table.\n* Returns\n *Table* \u2013 Created table.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n cost owner pet\n1 100 Alice 1\n2 90 Bob 1\n3 80 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n cost owner pet size\n11 100 Alice 3 M\n12 90 Bob 1 L\n13 80 Tom 1 XL\n''')\nresult = (t1.join(t2, t1.owner==t2.owner).groupby(pw.this.owner)\n .reduce(pw.this.owner, pairs = pw.reducers.count()))\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nowner | pairs\nAlice | 2\nBob | 1\n```\n::\n::\nclass pw.GroupedJoinable(_universe, _substitution, _joinable)\nclass pw.GroupedTable(table, grouping_columns, set_id=False, sort_by=None, _filter_out_results_of_forgetting=False)\nResult of a groupby operation on a Table.\nExample:\n"} -{"doc": "---\ntitle: Groupby API\nsidebar: 'API'\nnavigation: true\n---\n# Groupby API\nContains reference for helper classes related to groupby.\nclass pw.GroupedJoinResult(*, join_result, args, id)\nreduce(*args, kwargs)\nReduces grouped join result to table.\n* Returns\n *Table* \u2013 Created table.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.groupby(t1.pet, t1.owner)\nisinstance(t2, pw.GroupedTable)\n```\n::\nResult\n```\nTrue\n```\n::\n::\nreduce(*args, kwargs)\nReduces grouped table to a table.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`ColumnExpression`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: Groupby API\nsidebar: 'API'\nnavigation: true\n---\n# Groupby API\nContains reference for helper classes related to groupby.\nclass pw.GroupedJoinResult(*, join_result, args, id)\nreduce(*args, kwargs)\nReduces grouped join result to table.\n* Returns\n *Table* \u2013 Created table.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.groupby(t1.pet, t1.owner).reduce(t1.owner, t1.pet, ageagg=pw.reducers.sum(t1.age))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | pet | ageagg\nAlice | cat | 8\nAlice | dog | 10\nBob | dog | 16\n```\n::\n::\nclass pw.ReducerExpressionSplitter()\n"} -{"doc": "pathway.stdlib.utils.pandas_transformer module\npw.utils.pandas_transformer.pandas_transformer(output_schema, output_universe=None)\nDecorator that turns python function operating on pandas.DataFrame into pathway transformer.\nInput universes are converted into input DataFrame indexes.\nThe resulting index is treated as the output universe, so it must maintain uniqueness\nand be of integer type.\n* Parameters\n * output_schema (`type`\\[`Schema`\\]) \u2013 Schema of a resulting table.\n * output_universe (`UnionType`\\[`str`, `int`, `None`\\]) \u2013 Index or name of an argument whose universe will be used in resulting table. Defaults to None.\n* Returns\n Transformer that can be applied on Pathway tables.\nExample:\nCode\n```python\nimport pathway as pw\ninput = pw.debug.table_from_markdown(\n '''\n | foo | bar\n0 | 10 | 100\n1 | 20 | 200\n2 | 30 | 300\n'''\n)\nclass Output(pw.Schema):\n sum: int\n@pw.pandas_transformer(output_schema=Output)\ndef sum_cols(t: pd.DataFrame) -> pd.DataFrame:\n return pd.DataFrame(t.sum(axis=1))\noutput = sum_cols(input)\npw.debug.compute_and_print(output, include_id=False)\n```\n::\nResult\n```\nsum\n110\n220\n330\n```\n::\n::\n"} -{"doc": "pathway.stdlib.utils.col module\nFunctions\npw.utils.col.apply_all_rows(*cols, fun, result_col_name)\nApplies a function to all the data in selected columns at once, returning a single column.\nThis transformer is meant to be run infrequently on a relativelly small tables.\nInput:\n- cols: list of columns to which function will be applied\n- fun: function taking lists of columns and returning a corresponding list of outputs.\n- result_col_name: name of the output column\nOutput:\n- Table indexed with original indices with a single column named by \u201cresult_col_name\u201d argument\ncontaining results of the apply\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n'''\n | colA | colB\n1 | 1 | 10\n2 | 2 | 20\n3 | 3 | 30\n''')\ndef add_total_sum(col1, col2):\n sum_all = sum(col1) + sum(col2)\n return [x + sum_all for x in col1]\nresult = pw.utils.col.apply_all_rows(\n table.colA, table.colB, fun=add_total_sum, result_col_name=\"res\"\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nres\n67\n68\n69\n```\n::\n::\npw.utils.col.flatten_column(column, origin_id=.origin_id)\nDeprecated: use pw.Table.flatten instead.\nFlattens a column of a table.\nInput:\n- column: Column expression of column to be flattened\n- origin_id: name of output column where to store id\u2019s of input rows\nOutput:\n- Table with columns: colname_to_flatten and origin_id (if not None)\n"} -{"doc": "pathway.stdlib.utils.async_transformer module\nclass pw.utils.async_transformer.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\nCode\n```python\nimport pathway as pw\nimport asyncio\nclass OutputSchema(pw.Schema):\n ret: int\nclass AsyncIncrementTransformer(pw.AsyncTransformer, output_schema=OutputSchema):\n async def invoke(self, value) -> Dict[str, Any]:\n await asyncio.sleep(0.1)\n return {\"ret\": value + 1 }\ninput = pw.debug.table_from_markdown('''\n | value\n1 | 42\n2 | 44\n''')\nresult = AsyncIncrementTransformer(input_table=input).result\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nret\n43\n45\n```\n::\n::\nclose()\nCalled once at the end. Proper place for cleanup.\nabstract async invoke(*args, kwargs)\nCalled for every row of input_table. The arguments will correspond to the\ncolumns in the input table.\nShould return dict of values matching `output_schema`.\nopen()\nCalled before actual work. Suitable for one time setup.\nproperty result(: Table )\nResulting table.\nwith_options(capacity=None, retry_strategy=None, cache_strategy=None)\nSets async options.\n* Parameters\n * capacity (`Optional`\\[`int`\\]) \u2013 maximum number of concurrent operations.\n * retry_strategy (`Optional`\\[`AsyncRetryStrategy`\\]) \u2013 defines how failures will be handled.\n* Returns\n self\n"} -{"doc": "Example\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | pet | age\n1 | Dog | 2\n7 | Cat | 5\n''')\nt2 = pw.utils.col.flatten_column(t1.pet)\npw.debug.compute_and_print(t2.without(pw.this.origin_id), include_id=False)\n```\n::\nResult\n```\npet\nC\nD\na\ng\no\nt\n```\n::\n::\npw.utils.col.groupby_reduce_majority(column_group, column_val)\nFinds a majority in column_val for every group in column_group.\nWorkaround for missing majority reducer.\nExample:\n"} -{"doc": "Example\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n'''\n | group | vote\n0 | 1 | pizza\n1 | 1 | pizza\n2 | 1 | hotdog\n3 | 2 | hotdog\n4 | 2 | pasta\n5 | 2 | pasta\n6 | 2 | pasta\n''')\nresult = pw.utils.col.groupby_reduce_majority(table.group, table.vote)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\ngroup | majority\n1 | pizza\n2 | pasta\n```\n::\n::\npw.utils.col.multiapply_all_rows(*cols, fun, result_col_names)\nApplies a function to all the data in selected columns at once, returning multiple columns.\nThis transformer is meant to be run infrequently on a relativelly small tables.\nInput:\n- cols: list of columns to which function will be applied\n- fun: function taking lists of columns and returning a corresponding list of outputs.\n- result_col_names: names of the output columns\nOutput:\n- Table indexed with original indices with columns named by \u201cresult_col_names\u201d argument\ncontaining results of the apply\nExample:\n"} -{"doc": "Example\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n'''\n | colA | colB\n1 | 1 | 10\n2 | 2 | 20\n3 | 3 | 30\n''')\ndef add_total_sum(col1, col2):\n sum_all = sum(col1) + sum(col2)\n return [x + sum_all for x in col1], [x + sum_all for x in col2]\nresult = pw.utils.col.multiapply_all_rows(\n table.colA, table.colB, fun=add_total_sum, result_col_names=[\"res1\", \"res2\"]\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nres1 | res2\n67 | 76\n68 | 86\n69 | 96\n```\n::\n::\npw.utils.col.unpack_col(column, *unpacked_columns, schema=None)\nUnpacks multiple columns from a single column.\nArguments unpacked_columns and schema are mutually exclusive\nInput:\n- column: Column expression of column containing some sequences\n- unpacked_columns: list of names of output columns\n- schema: Schema of new columns\nOutput:\n- Table with columns named by \u201cunpacked_columns\u201d argument\nExamples:\n"} -{"doc": "Example\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n'''\n | colA | colB | colC\n1 | Alice | 25 | dog\n2 | Bob | 32 | cat\n3 | Carole | 28 | dog\n''')\nt2 = t1.select(user = pw.make_tuple(pw.this.colA, pw.this.colB, pw.this.colC))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nuser\n('Alice', 25, 'dog')\n('Bob', 32, 'cat')\n('Carole', 28, 'dog')\n```\n::\n::\n"} -{"doc": "Example\n```python\nclass SomeSchema(pw.Schema):\n name: str\n age: int\n pet: str\nunpack_table = pw.utils.col.unpack_col(t2.user, schema=SomeSchema)\npw.debug.compute_and_print(unpack_table, include_id=False)\n```\n::\nResult\n```\nname | age | pet\nAlice | 25 | dog\nBob | 32 | cat\nCarole | 28 | dog\n```\n::\n::"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport asyncio\nclass OutputSchema(pw.Schema):\n ret: int\nclass AsyncIncrementTransformer(pw.AsyncTransformer, output_schema=OutputSchema):\n async def invoke(self, value) -> Dict[str, Any]:\n await asyncio.sleep(0.1)\n return {\"ret\": value + 1 }\ninput = pw.debug.table_from_markdown('''\n | value\n1 | 42\n2 | 44\n''')\nresult = AsyncIncrementTransformer(input_table=input).result\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nret\n43\n45\n```\n::\n::\nclose()\nCalled once at the end. Proper place for cleanup.\nabstract async invoke(*args, kwargs)\nCalled for every row of input_table. The arguments will correspond to the\ncolumns in the input table.\nShould return dict of values matching `output_schema`.\nopen()\nCalled before actual work. Suitable for one time setup.\nwith_options(capacity=None, retry_strategy=None, cache_strategy=None)\nSets async options.\n* Parameters\n * capacity (`Optional`\\[`int`\\]) \u2013 maximum number of concurrent operations.\n * retry_strategy (`Optional`\\[`AsyncRetryStrategy`\\]) \u2013 defines how failures will be handled.\n* Returns\n self\nproperty result(: Table )\nResulting table.\nclass pw.BaseCustomAccumulator()\nUtility class for defining custom accumulators, used for custom reducers.\nCustom accumulators should inherit from this class, and should implement from_row,\nupdate and compute_result. Optionally neutral and retract can be provided\nfor more efficient processing on streams with changing data.\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nclass CustomAvgAccumulator(pw.BaseCustomAccumulator):\n def __init__(self, sum, cnt):\n self.sum = sum\n self.cnt = cnt\n @classmethod\n def from_row(self, row):\n [val] = row\n return CustomAvgAccumulator(val, 1)\n def update(self, other):\n self.sum += other.sum\n self.cnt += other.cnt\n def compute_result(self) -> float:\n return self.sum / self.cnt\nimport sys; sys.modules[__name__].CustomAvgAccumulator = CustomAvgAccumulator # NOSHOW\ncustom_avg = pw.reducers.udf_reducer(CustomAvgAccumulator)\nt1 = pw.debug.parse_to_table('''\nage | owner | pet | price\n10 | Alice | dog | 100\n9 | Bob | cat | 80\n8 | Alice | cat | 90\n7 | Bob | dog | 70\n''')\nt2 = t1.groupby(t1.owner).reduce(t1.owner, avg_price=custom_avg(t1.price))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | avg_price\nAlice | 95.0\nBob | 75.0\n```\n::\n::\nabstract compute_result()\nMandatory function to finalize computation.\nUsed to extract answer from final state of accumulator.\nNarrowing the type of this function helps better type the output of the reducer.\nabstract classmethod from_row(row)\nConstruct the accumulator from a row of data.\nRow will be passed as a list of values.\nThis is a mandatory function.\nclassmethod neutral()\nNeutral element of the accumulator (aggregation of an empty list).\nThis function is optional, and allows for more efficient processing on streams\nwith changing data.\nretract(other)\nUpdate the accumulator by removing the value of another one.\nThis function is optional, and allows more efficient reductions on streams\nwith changing data.\nabstract update(other)\nUpdate the accumulator with another one.\nMethod does not need to return anything, the change should be in-place.\nThis is a mandatory function.\nclass pw.ClassArg(ref: RowReference, ptr: Pointer)\nBase class to inherit from when writing inner classes for class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.output_attribute\n def ret(self) -> int:\n return self.arg + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7\n''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 8\n8 | 9\n9 | 10\n10 | 11\n```\n::\n::\npointer_from(*args, optional=False)\nPseudo-random hash of its argument. Produces pointer types. Applied value-wise.\nclass pw.ColumnExpression()\nas_bool()\nConverts value to a bool or None if not possible.\nCurrently works for Json columns only.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\nclass InputSchema(pw.Schema):\n data: dict\ndt = pd.DataFrame(data={\"data\": [{\"value\": True}, {\"value\": False}]})\ntable = pw.debug.table_from_pandas(dt, schema=InputSchema)\nresult = table.select(result=pw.this.data.get(\"value\").as_bool())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nresult\nFalse\nTrue\n```\n::\n::\nas_float()\nConverts value to a float or None if not possible.\nCurrently works for Json columns only.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\nclass InputSchema(pw.Schema):\n data: dict\ndt = pd.DataFrame(data={\"data\": [{\"value\": 1.5}, {\"value\": 3.14}]})\ntable = pw.debug.table_from_pandas(dt, schema=InputSchema)\nresult = table.select(result=pw.this.data.get(\"value\").as_float())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nresult\n1.5\n3.14\n```\n::\n::\nas_int()\nConverts value to an int or None if not possible.\nCurrently works for Json columns only.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\nclass InputSchema(pw.Schema):\n data: dict\ndt = pd.DataFrame(data={\"data\": [{\"value\": 1}, {\"value\": 2}]})\ntable = pw.debug.table_from_pandas(dt, schema=InputSchema)\nresult = table.select(result=pw.this.data.get(\"value\").as_int())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nresult\n1\n2\n```\n::\n::\nas_str()\nConverts value to a string or None if not possible.\nCurrently works for Json columns only.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\nclass InputSchema(pw.Schema):\n data: dict\ndt = pd.DataFrame(data={\"data\": [{\"value\": \"dog\"}, {\"value\": \"cat\"}]})\ntable = pw.debug.table_from_pandas(dt, schema=InputSchema)\nresult = table.select(result=pw.this.data.get(\"value\").as_str())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nresult\ncat\ndog\n```\n::\n::\nget(index, default=None)\nExtracts element at index from an object. The object has to be a Tuple or Json.\nIf no element is present at index, it returns value specified by a default parameter.\nIndex can be effectively int for Tuple and int or str for Json.\nFor Tuples, using negative index can be used to access elements at the end, moving backwards.\n* Parameters\n * index (`ColumnExpression` | `int` | `str`) \u2013 Position to extract element at.\n * default (`Union`\\[`ColumnExpression`, `None`, `int`, `float`, `str`, `bytes`, `bool`, `Pointer`, `datetime`, `timedelta`, `ndarray`, `Json`, `dict`\\[`str`, `Any`\\], `tuple`\\[`Any`, `...`\\]\\]) \u2013 Value returned when no element is at position index. Defaults to None.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b | c\n1 | 3 | 2 | 2\n2 | 4 | 1 | 0\n3 | 7 | 3 | 1\n'''\n)\nt2 = t1.with_columns(tup=pw.make_tuple(pw.this.a, pw.this.b))\nt3 = t2.select(\n x=pw.this.tup.get(1),\n y=pw.this.tup.get(3),\n z=pw.this.tup.get(pw.this.c),\n t=pw.this.tup.get(pw.this.c, default=100),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nx | y | z | t\n1 | | 4 | 4\n2 | | | 100\n3 | | 3 | 3\n```\n::\n::\nis_none()\nReturns true if the value is None.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | owner | pet\n1 | Alice | dog\n2 | Bob |\n3 | Carol | cat\n''')\nt2 = t1.with_columns(has_no_pet=pw.this.pet.is_none())\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | pet | has_no_pet\nAlice | dog | False\nBob | | True\nCarol | cat | False\n```\n::\n::\nis_not_none()\nReturns true if the value is not None.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | owner | pet\n1 | Alice | dog\n2 | Bob |\n3 | Carol | cat\n''')\nt2 = t1.with_columns(has_pet=pw.this.pet.is_not_none())\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | pet | has_pet\nAlice | dog | True\nBob | | False\nCarol | cat | True\n```\n::\n::\nto_string()\nChanges the values to strings.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nval\n1\n2\n3\n4''')\nt1.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1, include_id=False)\n```\n::\nResult\n```\nval\n1\n2\n3\n4\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt2 = t1.select(val = pw.this.val.to_string())\nt2.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t2.select(val=pw.this.val + \"a\"), include_id=False)\n```\n::\nResult\n```\nval\n1a\n2a\n3a\n4a\n```\n::\n::\nclass pw.ColumnReference(column, table, name)\nReference to the column.\nInherits from ColumnExpression.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nisinstance(t1.age, pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nisinstance(t1[\"owner\"], pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\nproperty name()\nName of the referred column.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nt1.age.name\n```\n::\nResult\n```\n'age'\n```\n::\n::\nproperty table()\nTable where the referred column belongs to.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nt1.age.table is t1\n```\n::\nResult\n```\nTrue\n```\n::\n::\nclass pw.DateTimeNaive(ts_input=, year=None, month=None, day=None, hour=None, minute=None, second=None, microsecond=None, tzinfo=None, *, nanosecond=None, tz=None, unit=None, fold=None)\nclass pw.DateTimeUtc(ts_input=, year=None, month=None, day=None, hour=None, minute=None, second=None, microsecond=None, tzinfo=None, *, nanosecond=None, tz=None, unit=None, fold=None)\nclass pw.Duration(value=, unit=None, kwargs)\nclass pw.Json(_value)\nRepresents JSON values.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\na | b | c\nTrue | 2 | manul\n''')\n@pw.udf\ndef to_json(val) -> pw.Json:\n return pw.Json(val)\nresult = t1.select({c: to_json(pw.this[c]) for c in t1.column_names()})\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\na | b | c\ntrue | 2 | \"manul\"\n```\n::\n::\nclass pw.MonitoringLevel(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nSpecifies a verbosity of Pathway monitoring mechanism.\nALL( = 4 )\nMonitor input connectors and latency for each operator in the execution graph. The\nlatency is measured as the difference between the time when the operator processed\nthe data and the time when pathway acquired the data.\nAUTO( = 0 )\nAutomatically sets IN_OUT in an interactive terminal and jupyter notebook.\nSets NONE otherwise.\nAUTO_ALL( = 1 )\nAutomatically sets ALL in an interactive terminal and jupyter notebook.\nSets NONE otherwise.\nIN_OUT( = 3 )\nMonitor input connectors and input and output latency. The latency is measured as\nthe difference between the time when the operator processed the data and the time\nwhen pathway acquired the data.\nNONE( = 2 )\nNo monitoring.\nclass pw.Schema()\nBase class to inherit from when creating schemas.\nAll schemas should be subclasses of this one.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nt1.schema\n```\n::\nResult\n```\n, 'owner': , 'pet': }>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nissubclass(t1.schema, pw.Schema)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nclass NewSchema(pw.Schema):\n foo: int\nSchemaSum = NewSchema | t1.schema\nSchemaSum\n```\n::\nResult\n```\n, 'owner': , 'pet': , 'foo': }>\n```\n::\n::\nclass pw.SchemaProperties(append_only=None)\nclass pw.TableSlice(mapping, table)\nCollection of references to Table columns.\nCreated by Table.slice method, or automatically by using left/right/this constructs.\nSupports basic column manipulation methods.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt1.slice.without(\"age\").with_suffix(\"_col\")\n```\n::\nResult\n```\nTableSlice({'owner_col': .owner, 'pet_col': .pet})\n```\n::\n::\nclass pw.iterate_universe(table)\nclass pw.left(*args, kwargs)\nObject for generating column references without holding the actual table in hand.\nNeeds to be evaluated in the proper context.\nFor Table.join() and JoinResult.select(), refers to the left input table.\nFor all other situations, you need pw.this object.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, pw.left.pet == pw.right.pet, pw.left.owner == pw.right.owner).select(\n age=pw.left.age, owner_name=pw.right.owner, size=pw.this.size\n )\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\nclass pw.right(*args, kwargs)\nObject for generating column references without holding the actual table in hand.\nNeeds to be evaluated in the proper context.\nFor Table.join() and JoinResult.select(), refers to the right input table.\nFor all other situations, you need pw.this object.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, pw.left.pet == pw.right.pet, pw.left.owner == pw.right.owner).select(\n age=pw.left.age, owner_name=pw.right.owner, size=pw.this.size\n )\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\nclass pw.this(*args, kwargs)\nObject for generating column references without holding the actual table in hand.\nNeeds to be evaluated in the proper context.\nFor most of the Table methods, it refers to self.\nFor JoinResult, it refers to the left input table.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.select(pw.this.owner, pw.this.age)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | age\nAlice | 8\nAlice | 10\nBob | 9\n```\n::\n::\nFunctions\npw.apply(fun, *args, kwargs)\nApplies function to column expressions, column-wise.\nOutput column type deduced from type-annotations of a function.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ndef concat(left: str, right: str) -> str:\n return left+right\nt1 = pw.debug.table_from_markdown('''\nage owner pet\n 10 Alice dog\n 9 Bob dog\n 8 Alice cat\n 7 Bob dog''')\nt2 = t1.select(col = pw.apply(concat, t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.apply_async(fun, *args, kwargs)\nApplies function asynchronously to column expressions, column-wise.\nOutput column type deduced from type-annotations of a function.\nEither a regular or async function can be passed.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport asyncio\nasync def concat(left: str, right: str) -> str:\n await asyncio.sleep(0.1)\n return left+right\nt1 = pw.debug.table_from_markdown('''\nage owner pet\n 10 Alice dog\n 9 Bob dog\n 8 Alice cat\n 7 Bob dog''')\nt2 = t1.select(col = pw.apply_async(concat, t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.apply_with_type(fun, ret_type, *args, kwargs)\nApplies function to column expressions, column-wise.\nOutput column type is provided explicitly.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nt2 = t1.select(col = pw.apply_with_type(lambda left, right: left+right, str, t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.assert_table_has_schema(table, schema, *, allow_superset=True, ignore_primary_keys=True)\nAsserts that the schema of the table is equivalent to the schema given as an argument.\n* Parameters\n * table (`Table`) \u2013 Table for which we are asserting schema.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema, which we assert that the Table has.\n * allow_superset (`bool`) \u2013 if True, the columns of the table can be a superset of columns\n in schema. The default value is True.\n * ignore_primary_keys (`bool`) \u2013 if True, the assert won\u2019t check whether table and schema\n have the same primary keys. The default value is True.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.select(pw.this.owner, age = pw.cast(float, pw.this.age))\nschema = pw.schema_builder(\n {\"age\": pw.column_definition(dtype=float), \"owner\": pw.column_definition(dtype=str)}\n)\npw.assert_table_has_schema(t2, schema)\n```\npw.attribute(func, kwargs)\nDecorator for creation of attributes.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.attribute\n def attr(self) -> float:\n return self.arg*2\n @pw.output_attribute\n def ret(self) -> float:\n return self.attr + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 15\n8 | 17\n9 | 19\n10 | 21\n```\n::\n::\npw.cast(target_type, col)\nChanges the type of the column to target_type and converts the data of this column\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n val\n1 10\n2 9\n3 8\n4 7''')\nt1.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1, include_id=False)\n```\n::\nResult\n```\nval\n7\n8\n9\n10\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt2 = t1.select(val = pw.cast(float, t1.val))\nt2.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nval\n7.0\n8.0\n9.0\n10.0\n```\n::\n::\npw.coalesce(*args)\nFor arguments list arg_1, arg_2, \u2026, arg_n returns first not-None value.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\ncolA colB\n | 10\n 2 |\n |\n 4 | 7''')\nt2 = t1.select(t1.colA, t1.colB, col=pw.coalesce(t1.colA, t1.colB))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncolA | colB | col\n | |\n | 10 | 10\n2 | | 2\n4 | 7 | 4\n```\n::\n::\npw.column_definition(*, primary_key=False, default_value=undefined, dtype=None, name=None, append_only=None)\nCreates column definition\n* Parameters\n * primary_key (`bool`) \u2013 should column be a part of a primary key.\n * default_value (`Optional`\\[`Any`\\]) \u2013 default value replacing blank entries. The default value of the\n column must be specified explicitly,\n otherwise there will be no default value.\n * dtype (`Optional`\\[`Any`\\]) \u2013 data type. When used in schema class,\n will be deduced from the type annotation.\n * name (`Optional`\\[`str`\\]) \u2013 name of a column. When used in schema class,\n will be deduced from the attribute name.\n * append_only (`Optional`\\[`bool`\\]) \u2013 whether column is append-only. if unspecified, defaults to False\n or to value specified at the schema definition level\n* Returns\n Column definition.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nclass NewSchema(pw.Schema):\n key: int = pw.column_definition(primary_key=True)\n timestamp: str = pw.column_definition(name=\"@timestamp\")\n data: str\nNewSchema\n```\n::\nResult\n```\n, '@timestamp': , 'data': }>\n```\n::\n::\npw.declare_type(target_type, col)\nUsed to change the type of a column to a particular type.\nDisclaimer: it only changes type in a schema, it does not affect values stored.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n val\n1 10\n2 9.5\n3 8\n4 7''')\nt1.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt2 = t1.filter(t1.val == pw.cast(int, t1.val))\nt2.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt3 = t2.select(val = pw.declare_type(int, t2.val))\nt3.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\npw.if_else(if_clause, then_clause, else_clause)\nEquivalent to:\n```default\nif (if_clause):\n return (then_clause)\nelse:\n return (else_clause)\n```\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\ncolA colB\n 1 | 0\n 2 | 2\n 6 | 3''')\nt2 = t1.select(res = pw.if_else(t1.colB != 0, t1.colA // t1.colB, 0))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nres\n0\n1\n2\n```\n::\n::\npw.input_attribute(type=)\nReturns new input_attribute. To be used inside class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.output_attribute\n def ret(self) -> float:\n return self.arg + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 8\n8 | 9\n9 | 10\n10 | 11\n```\n::\n::\npw.input_method(type=)\nDecorator for defining input methods in class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass first_transformer:\n class table(pw.ClassArg):\n a: float = pw.input_attribute()\n @pw.method\n def fun(self, arg) -> int:\n return self.a * arg\n@pw.transformer\nclass second_transformer:\n class table(pw.ClassArg):\n m = pw.input_method(int)\n @pw.output_attribute\n def val(self):\n return self.m(2)\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = first_transformer(table=t1.select(a=t1.age)).table\nt2.schema\n```\n::\nResult\n```\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt3 = second_transformer(table=t2.select(m=t2.fun)).table\npw.debug.compute_and_print(t1 + t3, include_id=False)\n```\n::\nResult\n```\nage | val\n7 | 14\n8 | 16\n9 | 18\n10 | 20\n```\n::\n::\npw.iterate(func, iteration_limit=None, kwargs)\nIterate function until fixed point.\nFunction has to take only named arguments, Tables, and return a dict of Tables.\nInitial arguments to function are passed through kwargs.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ndef collatz_transformer(iterated):\n def collatz_step(x: int) -> int:\n if x == 1:\n return 1\n elif x % 2 == 0:\n return x / 2\n else:\n return 3 * x + 1\n new_iterated = iterated.select(val=pw.apply(collatz_step, iterated.val))\n return dict(iterated=new_iterated)\ntab = pw.debug.table_from_markdown('''\nval\n 1\n 2\n 3\n 4\n 5\n 6\n 7\n 8''')\nret = pw.iterate(collatz_transformer, iterated=tab).iterated\npw.debug.compute_and_print(ret, include_id=False)\n```\n::\nResult\n```\nval\n1\n1\n1\n1\n1\n1\n1\n1\n```\n::\n::\npw.make_tuple(*args)\nCreates a tuple from the provided expressions.\n* Parameters\n args (`Union`\\[`ColumnExpression`, `None`, `int`, `float`, `str`, `bytes`, `bool`, `Pointer`, `datetime`, `timedelta`, `ndarray`, `Json`, `dict`\\[`str`, `Any`\\], `tuple`\\[`Any`, `...`\\]\\]) \u2013 a list of expressions to be put in a tuple\n* Returns\n tuple\nNOTE: * Each cell in the output column will be a tuple containing the corresponding values from the input columns.\n* The order of values in each tuple will match the order of the input columns.\n* If any of the input columns have missing values, the resulting tuples will contain None for those positions.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\na | b | c\n1 | 10 | a\n2 | 20 |\n3 | 30 | c\n'''\n)\ntable_with_tuple = table.select(res=pw.make_tuple(pw.this.a, pw.this.b, pw.this.c))\npw.debug.compute_and_print(table_with_tuple, include_id=False)\n```\n::\nResult\n```\nres\n(1, 10, 'a')\n(2, 20, None)\n(3, 30, 'c')\n```\n::\n::\npw.method(func, kwargs)\nDecorator for creation methods in class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n a: float = pw.input_attribute()\n @pw.output_attribute\n def b(self) -> float:\n return self.fun(self.a)\n @method\n def fun(self, arg) -> float:\n return self.a * arg\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(a=t1.age)).table\nt2.schema\n```\n::\nResult\n```\n, 'fun': typing.Callable[..., float]}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1 + t2.select(t2.b), include_id=False)\n```\n::\nResult\n```\nage | b\n7 | 49\n8 | 64\n9 | 81\n10 | 100\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1 + t2.select(out = t2.fun(t2.b)), include_id=False)\n```\n::\nResult\n```\nage | out\n7 | 343\n8 | 512\n9 | 729\n10 | 1000\n```\n::\n::\npw.numba_apply(fun, numba_signature, *args, kwargs)\nApplies function to column expressions, column-wise.\nFunction has to be numba compilable.\nCurrently only a few signatures are supported:\n- function has to be unary or binary\n- arguments and return type has to be either int64 or float64\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n val\n1 1\n2 3\n3 5\n4 7''')\nt2 = t1.select(col = pw.numba_apply(lambda x: x*x-2*x+1, \"int64(int64,)\", t1.val))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\n0\n4\n16\n36\n```\n::\n::\npw.output_attribute(func, kwargs)\nDecorator for creation of output_attributes.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.output_attribute\n def ret(self) -> float:\n return self.arg + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 8\n8 | 9\n9 | 10\n10 | 11\n```\n::\n::\npw.pandas_transformer(output_schema, output_universe=None)\nDecorator that turns python function operating on pandas.DataFrame into pathway transformer.\nInput universes are converted into input DataFrame indexes.\nThe resulting index is treated as the output universe, so it must maintain uniqueness\nand be of integer type.\n* Parameters\n * output_schema (`type`\\[`Schema`\\]) \u2013 Schema of a resulting table.\n * output_universe (`UnionType`\\[`str`, `int`, `None`\\]) \u2013 Index or name of an argument whose universe will be used in resulting table. Defaults to None.\n* Returns\n Transformer that can be applied on Pathway tables.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ninput = pw.debug.table_from_markdown(\n '''\n | foo | bar\n0 | 10 | 100\n1 | 20 | 200\n2 | 30 | 300\n'''\n)\nclass Output(pw.Schema):\n sum: int\n@pw.pandas_transformer(output_schema=Output)\ndef sum_cols(t: pd.DataFrame) -> pd.DataFrame:\n return pd.DataFrame(t.sum(axis=1))\noutput = sum_cols(input)\npw.debug.compute_and_print(output, include_id=False)\n```\n::\nResult\n```\nsum\n110\n220\n330\n```\n::\n::\npw.require(val, *deps)\nReturns val iff every dep in deps is not-None.\nReturns None otherwise.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\ncolA colB\n | 10\n 2 |\n |\n 4 | 7''')\nt2 = t1.select(t1.colA, t1.colB, col=pw.require(t1.colA + t1.colB, t1.colA, t1.colB))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncolA | colB | col\n | |\n | 10 |\n2 | |\n4 | 7 | 11\n```\n::\n::\npw.run(debug=False, monitoring_level=MonitoringLevel.AUTO, with_http_server=False, default_logging=True, persistence_config=None)\nRuns the computation graph.\n* Parameters\n * debug (`bool`) \u2013 enable output out of table.debug() operators\n * monitoring_level (`MonitoringLevel`) \u2013 the verbosity of stats monitoring mechanism. One of\n pathway.MonitoringLevel.NONE, pathway.MonitoringLevel.IN_OUT,\n pathway.MonitoringLevel.ALL. If unset, pathway will choose between\n NONE and IN_OUT based on output interactivity.\n * with_http_server (`bool`) \u2013 whether to start a http server with runtime metrics. Learn\n more in a tutorial .\n * default_logging (`bool`) \u2013 whether to allow pathway to set its own logging handler. Set\n it to False if you want to set your own logging handler.\n * persistence_config (`Optional`\\[`Config`\\]) \u2013 the config for persisting the state in case this\n persistence is required.\npw.schema_builder(columns, *, name=None, properties=SchemaProperties(append_only=None))\nAllows to build schema inline, from a dictionary of column definitions.\n* Parameters\n * columns (`dict`\\[`str`, `ColumnDefinition`\\]) \u2013 dictionary of column definitions.\n * name (`Optional`\\[`str`\\]) \u2013 schema name.\n * properties (`SchemaProperties`) \u2013 schema properties.\n* Returns\n Schema\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\npw.schema_builder(columns={\n 'key': pw.column_definition(dtype=int, primary_key=True),\n 'data': pw.column_definition(dtype=int, default_value=0)\n}, name=\"my_schema\")\n```\n::\nResult\n```\n, 'data': }>\n```\n::\n::\npw.schema_from_csv(path, *, name=None, properties=SchemaProperties(append_only=None), delimiter=',', quote='\"', comment_character=None, escape=None, double_quote_escapes=True, num_parsed_rows=None)\nAllows to generate schema based on a CSV file.\nThe names of the columns are taken from the header of the CSV file.\nTypes of columns are inferred from the values, by checking if they can be parsed.\nCurrently supported types are str, int and float.\n* Parameters\n * path (`str`) \u2013 path to the CSV file.\n * name (`Optional`\\[`str`\\]) \u2013 schema name.\n * properties (`SchemaProperties`) \u2013 schema properties.\n * delimiter (`str`) \u2013 delimiter used in CSV file. Defaults to \u201c,\u201d.\n * quote (`str`) \u2013 quote character used in CSV file. Defaults to \u2018\u201d\u2019.\n * comment_character (`Optional`\\[`str`\\]) \u2013 character used in CSV file to denote comments.\n Defaults to None\n * escape (`Optional`\\[`str`\\]) \u2013 escape character used in CSV file. Defaults to None.\n * double_quote_escapes (`bool`) \u2013 enable escapes of double quotes. Defaults to True.\n * num_parsed_rows (`Optional`\\[`int`\\]) \u2013 number of rows, which will be parsed when inferring types. When\n set to None, all rows will be parsed. When set to 0, types of all columns\n will be set to str. Defaults to None.\n* Returns\n Schema\npw.schema_from_dict(columns, *, name=None, properties=SchemaProperties(append_only=None))\nAllows to build schema inline, from a dictionary of column definitions.\nCompared to pw.schema_builder, this one uses simpler structure of the dictionary,\nwhich allows it to be loaded from JSON file.\n* Parameters\n * columns (`dict`) \u2013 dictionary of column definitions. The keys in this dictionary are names\n of the columns, and the values are either:\n - type of the column\n - dictionary with keys: \u201cdtype\u201d, \u201cprimary_key\u201d, \u201cdefault_value\u201d and values,\n respectively, type of the column, whether it is a primary key, and column\u2019s\n default value.\n The type can be given both by python class, or string with class name - that\n is both int and \u201cint\u201d are accepted.\n * name (`Optional`\\[`str`\\]) \u2013 schema name.\n * properties (`dict` | `SchemaProperties`) \u2013 schema properties, given either as instance of SchemaProperties class\n or a dict specifying arguments of SchemaProperties class.\n* Returns\n Schema\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\npw.schema_from_dict(columns={\n 'key': {\"dtype\": \"int\", \"primary_key\": True},\n 'data': {\"dtype\": \"int\", \"default_value\": 0}\n}, name=\"my_schema\")\n```\n::\nResult\n```\n, 'data': }>\n```\n::\n::\npw.schema_from_types(_name=None, kwargs)\nConstructs schema from kwargs: field=type.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ns = pw.schema_from_types(foo=int, bar=str)\ns\n```\n::\nResult\n```\n, 'bar': }>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nissubclass(s, pw.Schema)\n```\n::\nResult\n```\nTrue\n```\n::\n::\npw.sql(query, kwargs)\nRun a SQL query on Pathway tables.\n* Parameters\n * query (`str`) \u2013 the SQL query to execute.\n * kwargs (`Table`) \u2013 the association name: table used for the execution of the SQL query. Each name:table pair links a Pathway table to a table name used in the SQL query.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n \"\"\"\n A | B\n 1 | 2\n 4 | 3\n 4 | 7\n \"\"\"\n)\nret = pw.sql(\"SELECT * FROM tab WHERE A\npw.table_transformer(func=None, *, allow_superset=True, ignore_primary_keys=True, locals=None)\nDecorator for marking that a function performs operations on Tables. As a consequence,\narguments and return value, which are annotated to have type pw.Table\\[S\\]\nwill be checked whether they indeed have schema S.\n* Parameters\n * allow_superset (`Union`\\[`bool`, `Mapping`\\[`str`, `bool`\\]\\]) \u2013 if True, the columns of the table can be a superset of columns\n in schema. Can be given either as a bool, and this value is then used for\n all tables, or for each argument separately, by providing a dict whose keys\n are names of arguments, and values are bools specifying value of allow_superset\n for this argument. In the latter case to provide value for return value, provide\n value for key \u201creturn\u201d. The default value is True.\n * ignore_primary_keys (`Union`\\[`bool`, `Mapping`\\[`str`, `bool`\\]\\]) \u2013 if True, the assert won\u2019t check whether table and schema\n have the same primary keys. Can be given either as a bool, and this value is then used for\n all tables, or for each argument separately, by providing a dict whose keys\n are names of arguments, and values are bools specifying value of ignore_primary_keys\n for this argument. The default value is True.\n * locals (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 when Schema class, which is used as a parameter to pw.Table is defined locally,\n you need to pass locals() as locals argument.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nA | B\n1 | 6\n3 | 8\n5 | 2\n''')\nschema = pw.schema_from_types(A=int, B=int)\nresult_schema = pw.schema_from_types(A=int, B=int, C=int)\n@pw.table_transformer\ndef sum_columns(t: pw.Table[schema]) -> pw.Table[result_schema]:\n result = t.with_columns(C=pw.this.A + pw.this.B)\n return result\npw.debug.compute_and_print(sum_columns(t1), include_id=False)\n```\n::\nResult\n```\nA | B | C\n1 | 6 | 7\n3 | 8 | 11\n5 | 2 | 7\n```\n::\n::\npw.transformer(cls)\nDecorator that wraps the outer class when defining class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.output_attribute\n def ret(self) -> float:\n return self.arg + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 8\n8 | 9\n9 | 10\n10 | 11\n```\n::\n::\npw.udf(fun)\nCreate a Python UDF (universal data function) out of a callable.\nThe output type of the UDF is determined based on its type annotation.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.udf\ndef concat(left: str, right: str) -> str:\n return left+right\nt1 = pw.debug.table_from_markdown('''\nage owner pet\n 10 Alice dog\n 9 Bob dog\n 8 Alice cat\n 7 Bob dog''')\nt2 = t1.select(col = concat(t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.udf_async(fun=None, *, capacity=None, retry_strategy=None, cache_strategy=None)\nCreate a Python asynchronous UDF (universal data function) out of a callable.\nOutput column type deduced from type-annotations of a function.\nCan be applied to a regular or asynchronous function.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport asyncio\n@pw.udf_async\nasync def concat(left: str, right: str) -> str:\n await asyncio.sleep(0.1)\n return left+right\nt1 = pw.debug.table_from_markdown('''\nage owner pet\n 10 Alice dog\n 9 Bob dog\n 8 Alice cat\n 7 Bob dog''')\nt2 = t1.select(col = concat(t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.unwrap(col)\nChanges the type of the column from Optional\\[T\\] to T. If there is any None in the\ncolumn this operation will raise an exception.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\ncolA | colB\n1 | 5\n2 | 9\n3 | None\n4 | 15''')\nt1.schema\n```\n::\nResult\n```\n, 'colB': int | None}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1, include_id=False)\n```\n::\nResult\n```\ncolA | colB\n1 | 5\n2 | 9\n3 |\n4 | 15\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt2 = t1.filter(t1.colA < 3)\nt2.schema\n```\n::\nResult\n```\n, 'colB': int | None}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncolA | colB\n1 | 5\n2 | 9\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt3 = t2.select(colB = pw.unwrap(t2.colB))\nt3.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ncolB\n5\n9\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.io.http package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.http package\nclass pw.io.http.RetryPolicy(first_delay_ms, backoff_factor, jitter_ms)\nClass representing policy of delays or backoffs for the retries.\nFunctions\npw.io.http.read(url, *, schema=None, method='GET', payload=None, headers=None, response_mapper=None, format='json', delimiter=None, n_retries=0, retry_policy=, connect_timeout_ms=None, request_timeout_ms=None, allow_redirects=True, retry_codes=(429, 500, 502, 503, 504), autocommit_duration_ms=10000, debug_data=None, value_columns=None, primary_key=None, types=None, default_values=None)\nReads a table from an HTTP stream.\n* Parameters\n * url (`str`) \u2013 the full URL of streaming endpoint to fetch data from.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * method (`str`) \u2013 request method for streaming. It should be one of\n HTTP request methods.\n * payload (`Optional`\\[`Any`\\]) \u2013 data to be send in the body of the request.\n * headers (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 request headers in the form of dict. Wildcards are allowed both, in\n keys and in values.\n * response_mapper (`Optional`\\[`Callable`\\[\\[`str` | `bytes`\\], `bytes`\\]\\]) \u2013 in case a response needs to be processed, this method can be\n provided. It will be applied to each slice of a stream.\n * format (`str`) \u2013 format of the data, \u201cjson\u201d or \u201craw\u201d. In case of a \u201craw\u201d format,\n table with single \u201cdata\u201d column will be produced. For \u201cjson\u201d format, bytes\n encoded json is expected.\n * delimiter (`UnionType`\\[`str`, `bytes`, `None`\\]) \u2013 delimiter used to split stream into messages.\n * n_retries (`int`) \u2013 how many times to retry the failed request.\n * retry_policy (`RetryPolicy`) \u2013 policy of delays or backoffs for the retries.\n * connect_timeout_ms (`Optional`\\[`int`\\]) \u2013 connection timeout, specified in milliseconds. In case\n it\u2019s None, no restrictions on connection duration will be applied.\n * request_timeout_ms (`Optional`\\[`int`\\]) \u2013 request timeout, specified in milliseconds. In case\n it\u2019s None, no restrictions on request duration will be applied.\n * allow_redirects (`bool`) \u2013 whether to allow redirects.\n * retry_codes (`Optional`\\[`tuple`\\]) \u2013 HTTP status codes that trigger retries.\n * content_type \u2013 content type of the data to send. In case the chosen format is\n JSON, it will be defaulted to \u201capplication/json\u201d.\n * autocommit_duration_ms (`int`) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * debug_data \u2013 static data replacing original one when debug mode is active.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 columns to extract for a table. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 in case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated as uuid4. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 dictionary containing the mapping between the columns and the data types\n (`pw.Type`) of the values of those columns. This parameter is optional, and\n if not provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\nExamples:\nRaw format:\n```python\nimport os\nimport pathway as pw\ntable = pw.io.http.read(\n \"https://localhost:8000/stream\",\n method=\"GET\",\n headers={\"Authorization\": f\"Bearer {os.environ['BEARER_TOKEN']}\"},\n format=\"raw\",\n)\n```\nJSON with response mapper:\nInput can be adjusted using a mapping function that will be applied to each\nslice of a stream. The mapping function should return bytes.\n```python\ndef mapper(msg: bytes) -> bytes:\n result = json.loads(msg.decode())\n return json.dumps({\"key\": result[\"id\"], \"text\": result[\"data\"]}).encode()\nclass InputSchema(pw.Schema):\n key: int\n text: str\nt = pw.io.http.read(\n \"https://localhost:8000/stream\",\n method=\"GET\",\n headers={\"Authorization\": f\"Bearer {os.environ['BEARER_TOKEN']}\"},\n schema=InputSchema,\n response_mapper=mapper\n)\n```\npw.io.http.rest_connector(host, port, *, route='/', schema=None, autocommit_duration_ms=1500, keep_queries=None, delete_completed_queries=None)\nRuns a lightweight HTTP server and inputs a collection from the HTTP endpoint,\nconfigured by the parameters of this method.\nOn the output, the method provides a table and a callable, which needs to accept\nthe result table of the computation, which entries will be tracked and put into\nrespective request\u2019s responses.\n* Parameters\n * host (`str`) \u2013 TCP/IP host or a sequence of hosts for the created endpoint;\n * port (`int`) \u2013 port for the created endpoint;\n * route (`str`) \u2013 route which will be listened to by the web server;\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 schema of the resulting table;\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph;\n * keep_queries (`Optional`\\[`bool`\\]) \u2013 whether to keep queries after processing; defaults to False. \\[deprecated\\]\n * delete_completed_queries (`Optional`\\[`bool`\\]) \u2013 whether to send a deletion entry after the query is processed.\n Allows to remove it from the system if it is stored by operators such as `join` or `groupby`;\n* Returns\n *table* \u2013 the table read;\n response_writer: a callable, where the result table should be provided.\npw.io.http.write(table, url, *, method='POST', format='json', request_payload_template=None, n_retries=0, retry_policy=, connect_timeout_ms=None, request_timeout_ms=None, content_type=None, headers=None, allow_redirects=True, retry_codes=(429, 500, 502, 503, 504))\nSends the stream of updates from the table to the specified HTTP API.\n* Parameters\n * table (`Table`) \u2013 table to be tracked.\n * method (`str`) \u2013 request method for streaming. It should be one of\n HTTP request methods.\n * url (`str`) \u2013 the full URL of the endpoint to push data into. Can contain wildcards.\n * format (`str`) \u2013 the payload format, one of {\u201cjson\u201d, \u201ccustom\u201d}. If \u201cjson\u201d is\n specified, the plain JSON will be formed and sent. Otherwise, the contents of the\n field request_payload_template will be used.\n * request_payload_template (`Optional`\\[`str`\\]) \u2013 the template to format and send in case \u201ccustom\u201d was\n specified in the format field. Can include wildcards.\n * n_retries (`int`) \u2013 how many times to retry the failed request.\n * retry_policy (`RetryPolicy`) \u2013 policy of delays or backoffs for the retries.\n * connect_timeout_ms (`Optional`\\[`int`\\]) \u2013 connection timeout, specified in milliseconds. In case\n it\u2019s None, no restrictions on connection duration will be applied.\n * request_timeout_ms (`Optional`\\[`int`\\]) \u2013 request timeout, specified in milliseconds. In case it\u2019s\n None, no restrictions on request duration will be applied.\n * allow_redirects (`bool`) \u2013 Whether to allow redirects.\n * retry_codes (`Optional`\\[`tuple`\\]) \u2013 HTTP status codes that trigger retries.\n * content_type (`Optional`\\[`str`\\]) \u2013 content type of the data to send. In case the chosen format is\n JSON, it will be defaulted to \u201capplication/json\u201d.\n * headers (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 request headers in the form of dict. Wildcards are allowed both, in\n keys and in values.\nWildcards:\nWildcards are the proposed way to customize the HTTP requests composed. The\nengine will replace all entries of `{table.}` with a value from the\ncolumn `` in the row sent. This wildcard resolving will happen in url,\nrequest payload template and headers.\nExamples:\nFor the sake of demonstration, let\u2019s try different ways to send the stream of changes\non a table `pets`, containing data about pets and their owners. The table contains\njust two columns: the pet and the owner\u2019s name.\n```python\nimport pathway as pw\npets = pw.debug.table_from_markdown(\"owner pet \\n Alice dog \\n Bob cat \\n Alice cat\")\n```\nConsider that there is a need to send the stream of changes on such table to the\nexternal API endpoint (let\u2019s pick some exemplary URL for the sake of demonstration).\nTo keep things simple, we can suppose that this API accepts flat JSON objects, which\nare sent in POST requests. Then, the communication can be done with a simple code\nsnippet:\n```python\npw.io.http.write(pets, \"http://www.example.com/api/event\")\n```\nNow let\u2019s do something more custom. Suppose that the API endpoint requires us to\ncommunicate via PUT method and to pass the values as CGI-parameters. In this case,\nwildcards are the way to go:\n```python\npw.io.http.write(\n pets,\n \"http://www.example.com/api/event?owner={table.owner}&pet={table.pet}\",\n method=\"PUT\"\n)\n```\nA custom payload can also be formed from the outside. What if the endpoint requires\nthe data in tskv format in request body?\nFirst of all, let\u2019s form a template for the message body:\n```python\nmessage_template_tokens = [\n \"owner={table.owner}\",\n \"pet={table.pet}\",\n \"time={table.time}\",\n \"diff={table.diff}\",\n]\nmessage_template = \"\\t\".join(message_template_tokens)\n```\nNow, we can use this template and the custom format, this way:\n```python\npw.io.http.write(\n pets,\n \"http://www.example.com/api/event\",\n method=\"POST\",\n format=\"custom\",\n request_payload_template=message_template\n)\n```\n"} -{"doc": "pathway.xpacks.spatial.h3 module\npw.xpacks.spatial.h3.h3_cover_geojson(geojson, h3_level)\nCovers geojson with H3 cells at the given level.\nBuilt-in h3.polyfill is not enough as it outputs H3 cells for which their centroids fall into geojson.\n"} -{"doc": "pathway.xpacks.spatial.geofencing module\nclass pw.xpacks.spatial.geofencing.GeofenceIndex(data, geojson_geometry, resolution_meters, instance=None)\nH3-based geospatial index allowing for efficient point location inside geofences.\nGeofences are mapped to the corresponding cells id at a fixed hierarchy level.\nSee https://h3geo.org/docs/highlights/indexing/ for the description of H3 index structure.\nParameters:\ndata (pw.Table): The table containing the data to be indexed.\ngeometry (pw.ColumnExpression): The column expression representing geofences as geojsons.\nresolution_meters (float): approximately determines how large covering H3 cells should be\ninstance (pw.ColumnExpression or None): The column expression representing the instance of the index\n> allowing for creating multiple indexes at once.\nCaveats:\nGeofences crossing antimeridian are not yet supported.\njoin_enclosing_geofences(query_table, *, lat, lon, instance=None)\nEfficiently joins (via left_join) rows of query table with rows of indexed geofences\nfor which the query point is inside a target geofence.\nParameters:\nquery_table (pw.Table): The table containing the queries.\nlat (pw.ColumnExpression): The column expression representing latitudes (degrees) in the query_table.\nlon (pw.ColumnExpression): The column expression representing longitudes (degrees) in the query_table.\ninstance (pw.ColumnExpression or None): The column expression representing the instance of the index\n> allowing for parallel queries to multiple indexes at once.\n* Returns\n *pw.JoinResult* \u2013 result of a join between query_table and indexed data table\nExample:\nCode\n```python\nimport pathway as pw\nqueries = pw.debug.table_from_markdown('''\n | lon | lat | sample_data\n1 | 11.0 | 1.0 | foo\n2 | 11.0 | 21.0 | bar\n3 | 20.0 | 1.0 | baz\n''')\n@pw.udf\ndef json_parse(col: str) -> pw.Json:\n return pw.Json.parse(col)\ndata = pw.debug.table_from_markdown('''\n | other_data | geometry\n111 | AAA | {\"coordinates\":[[[10.0,0.0],[12.0,0.0],[12.0,2.0],[10.0,2.0]]],\"type\":\"Polygon\"}\n222 | BBB | {\"coordinates\":[[[10.0,20.0],[12.0,20.0],[12.0,22.0],[10.0,22.0]]],\"type\":\"Polygon\"}\n''').with_columns(geometry=json_parse(pw.this.geometry))\nindex = pw.xpacks.spatial.geofencing.GeofenceIndex(\n data, data.geometry, resolution_meters=100_000,\n)\nres = index.join_enclosing_geofences(\n queries,\n lat=queries.lat,\n lon=queries.lon,\n).select(\n queries.sample_data,\n pw.right.other_data,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nsample_data | other_data\nbar | BBB\nbaz |\nfoo | AAA\n```\n::\n::\nFunctions\npw.xpacks.spatial.geofencing.is_in_geofence(lat, lon, geojson_geometry)\nTest if point is inside a geojson polygon\n"} -{"doc": "pathway.xpacks.spatial.index module\nclass pw.xpacks.spatial.index.H3Index(data, lat, lon, radius_meters, instance=None)\nH3-based geospatial index allowing for finding nearby lat lon points.\nLat lon points are mapped to the corresponding cell id at a fixed hierarchy level.\nThey are also mapped to the neighboring cells for fast closeby points retrieval.\nSee https://h3geo.org/docs/highlights/indexing/ for the description of H3 index structure.\nParameters:\ndata (pw.Table): The table containing the data to be indexed.\nlat (pw.ColumnExpression): The column expression representing latitudes (degrees) in the data.\nlon (pw.ColumnExpression): The column expression representing longitudes (degrees) in the data.\nradius_meters (float): maximum distance supported\ninstance (pw.ColumnExpression or None): The column expression representing the instance of the index\n> allowing for creating multiple indexes at once.\njoin_on_distance(query_table, query_lat, query_lon, distance_meters=None, instance=None)\nThis method efficiently joins (via left_join) rows of query table with rows of indexed data\nsuch that two points are within a certain distance.\nParameters:\nquery_table (pw.Table): The table containing the queries.\nlat (pw.ColumnExpression): The column expression representing latitudes (degrees) in the query_table.\nlon (pw.ColumnExpression): The column expression representing longitudes (degrees) in the query_table.\ninstance (pw.ColumnExpression or None): The column expression representing the instance of the index\n> allowing for parallel queries to multiple indexes at once.\n* Returns\n *pw.JoinResult* \u2013 result of a (distance-limited) join between query_table and indexed data table\nExample:\nCode\n```python\nimport pathway as pw\nqueries = pw.debug.table_from_markdown('''\n | instance | lat | lon | sample_data\n1 | 1 | 51.1000 | 17.0300 | foo\n2 | 1 | 51.1010 | 17.0310 | bar\n3 | 2 | 40.0000 | 179.999 | baz\n4 | 2 | 10.0000 | 10.0000 | zzz\n''')\ndata = pw.debug.table_from_markdown('''\n | instance | lat | lon | other_data\n111 | 1 | 51.0990 | 17.0290 | AAA\n112 | 1 | 51.1000 | 17.0300 | BBB\n113 | 1 | 51.1010 | 17.0310 | CCC\n114 | 1 | 51.1020 | 17.0320 | DDD\n311 | 2 | 40.0000 | 179.999 | EEE\n313 | 2 | 40.0000 | -179.999 | FFF\n314 | 2 | 40.0000 | -179.980 | GGG\n412 | 2 | 51.1000 | 17.0300 | HHH\n''')\nindex = pw.xpacks.spatial.index.H3Index(\n data, data.lat, data.lon, instance=data.instance, radius_meters=200,\n)\nres = index.join_on_distance(\n queries,\n queries.lat,\n queries.lon,\n instance=queries.instance,\n).select(\n instance=queries.instance,\n sample_data=queries.sample_data,\n other_data=pw.right.other_data,\n dist_meters=pw.left.dist_meters.num.fill_na(-1).num.round(1),\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\ninstance | sample_data | other_data | dist_meters\n1 | bar | BBB | 131.5\n1 | bar | CCC | 0.0\n1 | bar | DDD | 131.5\n1 | foo | AAA | 131.5\n1 | foo | BBB | 0.0\n1 | foo | CCC | 131.5\n2 | baz | EEE | 0.0\n2 | baz | FFF | 170.8\n2 | zzz | | -1.0\n```\n::\n::\n"} -{"doc": "---\ntitle: Other API\nsidebar: 'API'\nnavigation: true\n---\n# Other API\nThe Other API section provides a complementary collection of resources covering various aspects of our Pathway Standard Library. This section is helpful for developers and data analysts seeking to extend their knowledge and proficiency with our diverse API offerings. In addition to Temporal Functions, it provides in-depth information about Machine Learning Models and some column functions.\n# Contents:\n* Temporal Functions\n* ML Classifiers\n"} -{"doc": "---\ntitle: pathway.stdlib.stateful package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.stateful package\nFunctions\npw.stateful.deduplicate(table, *, col, instance=None, acceptor)\nDeduplicates rows in table on col column using acceptor function.\nIt keeps rows which where accepted by the acceptor function.\nAcceptor operates on two arguments - current value and the previous accepted value.\n* Parameters\n * table (*pw.Table\\[TSchema\\]*) \u2013 table to deduplicate\n * col (*pw.ColumnReference*) \u2013 column used for deduplication\n * acceptor (*Callable\\[\\[TDedupe, TDedupe\\], bool\\]*) \u2013 callback telling whether two values are different\n * instance (*pw.ColumnExpression, optional*) \u2013 Group column for which deduplication will be performed separately.\n Defaults to None.\n* Returns\n *pw.Table\\[TSchema\\]*\n"} -{"doc": "pathway.stdlib.stateful.deduplicate module\npw.stateful.deduplicate.deduplicate(table, *, col, instance=None, acceptor)\nDeduplicates rows in table on col column using acceptor function.\nIt keeps rows which where accepted by the acceptor function.\nAcceptor operates on two arguments - current value and the previous accepted value.\n* Parameters\n * table (*pw.Table\\[TSchema\\]*) \u2013 table to deduplicate\n * col (*pw.ColumnReference*) \u2013 column used for deduplication\n * acceptor (*Callable\\[\\[TDedupe, TDedupe\\], bool\\]*) \u2013 callback telling whether two values are different\n * instance (*pw.ColumnExpression, optional*) \u2013 Group column for which deduplication will be performed separately.\n Defaults to None.\n* Returns\n *pw.Table\\[TSchema\\]*\n"} -{"doc": "Subpackages\n* pathway.stdlib.graphs package\n * `Edge`\n * `Graph`\n * `Vertex`\n * `WeightedGraph`\n * Subpackages\n * pathway.stdlib.graphs.bellman_ford package\n * `DistFromSource`\n * `Vertex`\n * Submodules\n * pathway.stdlib.graphs.bellman_ford.impl module\n * pathway.stdlib.graphs.louvain_communities package\n * Submodules\n * pathway.stdlib.graphs.louvain_communities.impl module\n * pathway.stdlib.graphs.pagerank package\n * `Result`\n * Submodules\n * pathway.stdlib.graphs.pagerank.impl module\n * Submodules\n * pathway.stdlib.graphs.common module\n * `Cluster`\n * `Clustering`\n * `Edge`\n * `Vertex`\n * `Weight`\n * pathway.stdlib.graphs.graph module\n * `Graph`\n * `WeightedGraph`\n* pathway.stdlib.indexing package\n * `SortedIndex`\n * `SortedIndex.clear()`\n * `SortedIndex.copy()`\n * `SortedIndex.fromkeys()`\n * `SortedIndex.get()`\n * `SortedIndex.items()`\n * `SortedIndex.keys()`\n * `SortedIndex.pop()`\n * `SortedIndex.popitem()`\n * `SortedIndex.setdefault()`\n * `SortedIndex.update()`\n * `SortedIndex.values()`\n * `retrieve_prev_next_values()`\n * Submodules\n * pathway.stdlib.indexing.sorting module\n * `Aggregate`\n * `BinsearchOracle`\n * `Candidate`\n * `ComparisonRet`\n * `Hash`\n * `Instance`\n * `Key`\n * `LeftRight`\n * `Node`\n * `Parent`\n * `PrefixSumOracle`\n * `PrevNext`\n * `SortedIndex`\n * `SortedIndex.clear()`\n * `SortedIndex.copy()`\n * `SortedIndex.fromkeys()`\n * `SortedIndex.get()`\n * `SortedIndex.items()`\n * `SortedIndex.keys()`\n * `SortedIndex.pop()`\n * `SortedIndex.popitem()`\n * `SortedIndex.setdefault()`\n * `SortedIndex.update()`\n * `SortedIndex.values()`\n * `Value`\n * `retrieve_prev_next_values()`\n* pathway.stdlib.ml package\n * Subpackages\n * pathway.stdlib.ml.classifiers package\n * `knn_lsh_classifier_train()`\n * `knn_lsh_classify()`\n * `knn_lsh_euclidean_classifier_train()`\n * `knn_lsh_generic_classifier_train()`\n * `knn_lsh_train()`\n * Submodules\n * pathway.stdlib.ml.classifiers.test_lsh module\n * pathway.stdlib.ml.datasets package\n * Subpackages\n * pathway.stdlib.ml.smart_table_ops package\n * `Edge`\n * `Feature`\n * `FuzzyJoinFeatureGeneration`\n * `FuzzyJoinNormalization`\n * `JoinResult`\n * `Node`\n * Submodules\n * pathway.stdlib.ml.index module\n * `KNNIndex`\n * `KNNIndex.get_nearest_items()`\n * `KNNIndex.get_nearest_items_asof_now()`\n * pathway.stdlib.ml.utils module\n* pathway.stdlib.ordered package\n * `diff()`\n * Submodules\n * pathway.stdlib.ordered.diff module\n * `diff()`\n* pathway.stdlib.stateful package\n * `deduplicate()`\n * Submodules\n * pathway.stdlib.stateful.deduplicate module\n * `deduplicate()`\n* pathway.stdlib.statistical package\n * `interpolate()`\n* pathway.stdlib.temporal package\n * `AsofJoinResult`\n * `AsofNowJoinResult`\n * `AsofNowJoinResult.select()`\n * `CommonBehavior`\n * `Direction`\n * `IntervalJoinResult`\n * `IntervalJoinResult.select()`\n * `Window`\n * `WindowJoinResult`\n * `WindowJoinResult.select()`\n * `asof_join()`\n * `asof_join_left()`\n * `asof_join_outer()`\n * `asof_join_right()`\n * `asof_now_join()`\n * `asof_now_join_inner()`\n * `asof_now_join_left()`\n * `common_behavior()`\n * `interval()`\n * `interval_join()`\n * `interval_join_inner()`\n * `interval_join_left()`\n * `interval_join_outer()`\n * `interval_join_right()`\n * `intervals_over()`\n * `session()`\n * `sliding()`\n * `tumbling()`\n * `window_join()`\n * `window_join_inner()`\n * `window_join_left()`\n * `window_join_outer()`\n * `window_join_right()`\n * `windowby()`\n * Submodules\n * pathway.stdlib.temporal.temporal_behavior module\n * `Behavior`\n * `CommonBehavior`\n * `ExactlyOnceBehavior`\n * `common_behavior()`\n * `exactly_once_behavior()`\n * pathway.stdlib.temporal.utils module\n * `check_joint_types()`\n* pathway.stdlib.utils package\n * Submodules\n * pathway.stdlib.utils.async_transformer module\n * `AsyncTransformer`\n * `AsyncTransformer.close()`\n * `AsyncTransformer.invoke()`\n * `AsyncTransformer.open()`\n * `AsyncTransformer.result`\n * `AsyncTransformer.with_options()`\n * pathway.stdlib.utils.bucketing module\n * pathway.stdlib.utils.col module\n * `apply_all_rows()`\n * `flatten_column()`\n * `groupby_reduce_majority()`\n * `multiapply_all_rows()`\n * `unpack_col()`\n * pathway.stdlib.utils.filtering module\n * pathway.stdlib.utils.pandas_transformer module\n * `pandas_transformer()`\n"} -{"doc": "---\ntitle: pathway.debug package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.debug package\nFunctions\npw.debug.compute_and_print(table, *, include_id=True, short_pointers=True, n_rows=None)\nA function running the computations and printing the table.\n:type table: `Table`\n:param table: a table to be computed and printed\n:type include_id: \n:param include_id: whether to show ids of rows\n:type short_pointers: \n:param short_pointers: whether to shorten printed ids\n:type n_rows: `Optional`\\[`int`\\]\n:param n_rows: number of rows to print, if None whole table will be printed\npw.debug.compute_and_print_update_stream(table, *, include_id=True, short_pointers=True, n_rows=None)\nA function running the computations and printing the update stream of the table.\n:type table: `Table`\n:param table: a table for which the update stream is to be computed and printed\n:type include_id: \n:param include_id: whether to show ids of rows\n:type short_pointers: \n:param short_pointers: whether to shorten printed ids\n:type n_rows: `Optional`\\[`int`\\]\n:param n_rows: number of rows to print, if None whole update stream will be printed\npw.debug.table_from_markdown(table_def, id_from=None, unsafe_trusted_ids=False, schema=None)\nA function for creating a table from its definition in markdown. If it contains a special\ncolumn `__time__`, rows will be split into batches with timestamps from the column.\nA special column `__diff__` can be used to set an event type - with `1` treated\nas inserting the row and `-1` as removing it.\npw.debug.table_from_pandas(df, id_from=None, unsafe_trusted_ids=False, schema=None)\nA function for creating a table from a pandas DataFrame. If it contains a special\ncolumn `__time__`, rows will be split into batches with timestamps from the column.\nA special column `__diff__` can be used to set an event type - with `1` treated\nas inserting the row and `-1` as removing it.\npw.debug.table_from_parquet(path, id_from=None, unsafe_trusted_ids=False)\nReads a Parquet file into a pandas DataFrame and then converts that into a Pathway table.\npw.debug.table_to_parquet(table, filename)\nConverts a Pathway Table into a pandas DataFrame and then writes it to Parquet\n"} -{"doc": "---\ntitle: pathway.io.elasticsearch package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.elasticsearch package\nFunctions\npw.io.elasticsearch.write(table, host, auth, index_name)\nWrite a table to a given index in ElasticSearch.\n* Parameters\n * table (`Table`) \u2013 the table to output.\n * host (`str`) \u2013 the host and port, on which Elasticsearch server works.\n * auth (`ElasticSearchAuth`) \u2013 credentials for Elasticsearch authorization.\n * index_name (`str`) \u2013 name of the index, which gets the docs.\n* Returns\n None\nExample:\nConsider there is an instance of Elasticsearch, running locally on a port 9200.\nThere we have an index \u201canimals\u201d, containing an information about pets and their\nowners.\nFor the sake of simplicity we will also consider that the cluster has a simple\nusername-password authentication having both username and password equal to \u201cadmin\u201d.\nNow suppose we want to send a Pathway table pets to this local instance of\nElasticsearch.\n```python\nimport pathway as pw\npets = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | cat\n8 | Alice | cat\n''')\n```\nIt can be done as follows:\n```python\npw.io.elasticsearch.write(\n table=pets,\n host=\"http://localhost:9200\",\n auth=pw.io.elasticsearch.ElasticSearchAuth.basic(\"admin\", \"admin\"),\n index_name=\"animals\",\n)\n```\nAll the updates of table \u201cpets\u201d will be indexed to \u201canimals\u201d as well.\n"} -{"doc": "Read and write\n* pathway.io.csv package\n * `read()`\n * `write()`\n* pathway.io.fs package\n * `read()`\n * `write()`\n* pathway.io.http package\n * `RetryPolicy`\n * `read()`\n * `rest_connector()`\n * `write()`\n* pathway.io.jsonlines package\n * `read()`\n * `write()`\n* pathway.io.kafka package\n * `read()`\n * `read_from_upstash()`\n * `simple_read()`\n * `write()`\n* pathway.io.redpanda package\n * `read()`\n * `write()`\n"} -{"doc": "Read only\n* pathway.io.debezium package\n * `read()`\n* pathway.io.plaintext package\n * `read()`\n* pathway.io.python package\n * `ConnectorSubject`\n * `ConnectorSubject.close()`\n * `ConnectorSubject.commit()`\n * `ConnectorSubject.next_bytes()`\n * `ConnectorSubject.next_json()`\n * `ConnectorSubject.next_str()`\n * `ConnectorSubject.on_stop()`\n * `ConnectorSubject.start()`\n * `read()`\n* pathway.io.s3 package\n * `AwsS3Settings`\n * `AwsS3Settings.new_from_path()`\n * `DigitalOceanS3Settings`\n * `WasabiS3Settings`\n * `read()`\n * `read_from_digital_ocean()`\n * `read_from_wasabi()`\n* pathway.io.minio package\n * `MinIOSettings`\n * `read()`\n* pathway.io.gdrive package\n * `read()`\n* pathway.io.sqlite package\n * `read()`\n"} -{"doc": "Write only\n* pathway.io.elasticsearch package\n * `write()`\n* pathway.io.logstash package\n * `write()`\n* pathway.io.null package\n * `write()`\n* pathway.io.postgres package\n * `write()`\n * `write_snapshot()`\nclass pw.io.CsvParserSettings(delimiter=',', quote='\"', escape=None, enable_double_quote_escapes=True, enable_quoting=True, comment_character=None)\nClass representing settings for the CSV parser.\n* Parameters\n * delimiter \u2013 Field delimiter to use when parsing CSV.\n * quote \u2013 Quote character to use when parsing CSV.\n * escape \u2013 What character to use for escaping fields in CSV.\n * enable_double_quote_escapes \u2013 Enable escapes of double quotes.\n * enable_quoting \u2013 Enable quoting for the fields.\n * comment_character \u2013 If specified, the lines starting with the comment character will be treated as comments and therefore, will be ignored by parser\nclass pw.io.OnChangeCallback(*args, kwargs)\nThe callback to be called on every change in the table. It is required to be\ncallable and to accept four parameters: the key, the row changed, the time of the\nchange in milliseconds and the flag stating if the change had been an addition\nof the row.\nclass pw.io.OnFinishCallback(*args, kwargs)\nThe callback function to be called when the stream of changes ends. It will be called on each engine worker separately.\nFunctions\npw.io.subscribe(table, on_change, on_end=>)\nCalls a callback function on_change on every change happening in table.\n* Parameters\n * table \u2013 the table to subscribe.\n * on_change (`OnChangeCallback`) \u2013 the callback to be called on every change in the table. The\n function is required to accept three parameters: the row changed, the time\n of the change in microseconds and the flag stating if the change had been an\n addition of the row. These parameters of the callback are expected to have\n names row, time and is_addition respectively.\n * on_end (`OnFinishCallback`) \u2013 the callback to be called when the stream of changes ends.\n It will be called on each engine worker separately.\n* Returns\n None\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\n | pet | owner | age | __time__ | __diff__\n 1 | dog | Alice | 10 | 0 | 1\n 2 | cat | Alice | 8 | 1 | 1\n 3 | dog | Bob | 7 | 2 | 1\n 2 | cat | Alice | 8 | 3 | -1\n''')\ndef on_change(key: pw.Pointer, row: dict, time: int, is_addition: bool):\n print(f\"{row}, {time}, {is_addition}\")\ndef on_end():\n print(\"End of stream.\")\npw.io.subscribe(table, on_change, on_end)\npw.run(monitoring_level=pw.MonitoringLevel.NONE)\n```\n::\nResult\n```\n{'pet': 'dog', 'owner': 'Alice', 'age': 10}, 0, True\n{'pet': 'cat', 'owner': 'Alice', 'age': 8}, 2, True\n{'pet': 'dog', 'owner': 'Bob', 'age': 7}, 4, True\n{'pet': 'cat', 'owner': 'Alice', 'age': 8}, 6, False\nEnd of stream.\n```\n::\n::\n"} -{"doc": "Configuration classes\nclass pw.persistence.Backend(engine_data_storage, fs_path=None)\nThe settings of a backend, which is used to persist the computation state. There\nare two kinds of data backends: metadata backend and snapshot backend. Both are\nconfigurable via this class.\nclassmethod filesystem(path)\nConfigure the filesystem backend.\n* Parameters\n path (`str` | `PathLike`\\[`str`\\]) \u2013 the path to the root directory in the file system, which will be used to store the persisted data.\n* Returns\n Class instance denoting the filesystem storage backend with root directory at `path`.\nclassmethod s3(root_path, bucket_settings)\nConfigure the S3 backend.\n* Parameters\n * root_path (`str`) \u2013 path to the root in the S3 storage, which will be used to store persisted data;\n * bucket_settings (`AwsS3Settings`) \u2013 the settings for S3 bucket connection in the same format as they are used by S3 connectors.\n* Returns\n Class instance denoting the S3 storage backend with root directory as\n `root_path` and connection settings given by `bucket_settings`.\nclass pw.persistence.Config(*, snapshot_interval_ms=0, metadata_storage, snapshot_storage, snapshot_access, replay_mode, continue_after_replay)\nConfigure the data persistence. An instance of this class should be passed as a\nparameter to pw.run in case persistence is enabled.\nPlease note that if you\u2019d like to use the same backend for both metadata and\nsnapshot storages, you can use the convenience method `simple_config`.\n* Parameters\n * metadata_storage (`Backend`) \u2013 metadata backend configuration;\n * snapshot_storage (`Backend`) \u2013 snapshots backend configuration;\n * snapshot_interval_ms (`int`) \u2013 the desired duration between snapshot updates in milliseconds;\nclassmethod simple_config(backend, snapshot_interval_ms=0, snapshot_access=, replay_mode=, continue_after_replay=True)\nConstruct config from a single instance of the `Backend` class, using this backend to persist metadata and snapshot.\n* Parameters\n * backend (`Backend`) \u2013 storage backend settings;\n * snapshot_interval_ms \u2013 the desired freshness of the persisted snapshot in milliseconds. The greater the value is, the more the amount of time that the snapshot may fall behind, and the less computational resources are required.\n* Returns\n Persistence config.\n"} -{"doc": "---\ntitle: SQL API\ndescription: 'Using SQL commands with Pathway using pw.sql function.'\nnotebook_export_path: documentation/sql_api.ipynb\n---\n\u00a0\u00a0\n# Using SQL with Pathway\nPerform SQL commands using Pathway's `pw.sql` function.\n---\nPathway provides a very simple way to use SQL commands directly in your Pathway application: the use of `pw.sql`.\nPathway is significantly different from a usual SQL database, and not all SQL operations are available in Pathway.\nIn the following, we present the SQL operations which are compatible with Pathway and how to use `pw.sql`.\nThis article is a summary of dos and don'ts on how to use Pathway to execute SQL queries, this is not an introduction to SQL.\n"} -{"doc": "Usage\nYou can very easily execute a SQL command by doing the following:\n```python\npw.sql(query, tab=t)\n```\nThis will execute the SQL command `query` where the Pathway table `t` (Python local variable) can be referred to as `tab` (SQL table name) inside `query`.\nMore generally, you can pass an arbitrary number of tables associations `name, table` using `kwargs`: `pw.sql(query, tab1=t1, tab2=t2,.., tabn=tn)`.\n"} -{"doc": "Example\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n \"\"\"\n | a | b\n 1 | 1 | 2\n 2 | 4 | 3\n 3 | 4 | 7\n \"\"\"\n)\nret = pw.sql(\"SELECT * FROM tab WHERE a2\", tab=t)\npw.debug.compute_and_print(result_where)\n```\n [2023-10-19T14:44:28]:INFO:Preparing Pathway computation\n | a | b\n ^Z3QWT29... | 4 | 3\n ^3CZ78B4... | 4 | 7\n"} -{"doc": "`GROUP BY`\nYou can use `GROUP BY` to group rows with the same value for a given column, and to use an aggregate function over the grouped rows.\n```python\nresult_groupby = pw.sql(\"SELECT a, SUM(b) FROM tab GROUP BY a\", tab=t)\npw.debug.compute_and_print(result_groupby)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | _col_1\n ^YYY4HAB... | 1 | 2\n ^3HN31E1... | 4 | 10\n\u26a0\ufe0f `GROUP BY` and `JOIN` should not be used together in a single `SELECT`.\n#### Aggregation functions\nWith `GROUP BY`, you can use the following aggregation functions:\n- `AVG`\n- `COUNT`\n- `MAX`\n- `MIN`\n- `SUM`\n\u26a0\ufe0f Pathway reducers (`pw.count`, `pw.sum`, etc.) aggregate over `None` values, while traditional SQL aggregate functions skip `NULL` values: be careful to remove all the undefined values before using an aggregate function.\n"} -{"doc": "`AS` (alias)\nPathway supports both notations: `old_name as new_name` and `old_name new_name`.\n```python\nresult_alias = pw.sql(\"SELECT b, a AS c FROM tab\", tab=t)\npw.debug.compute_and_print(result_alias)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | b | c\n ^YYY4HAB... | 2 | 1\n ^Z3QWT29... | 3 | 4\n ^3CZ78B4... | 7 | 4\n```python\nresult_alias = pw.sql(\"SELECT b, a c FROM tab\", tab=t)\npw.debug.compute_and_print(result_alias)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | b | c\n ^YYY4HAB... | 2 | 1\n ^Z3QWT29... | 3 | 4\n ^3CZ78B4... | 7 | 4\n"} -{"doc": "`UNION`\nPathway provides the standard `UNION` SQL operator.\nNote that `UNION` requires matching column names.\n```python\nt_union = pw.debug.table_from_markdown(\n \"\"\"\n | a | b\n 4 | 9 | 3\n 5 | 2 | 7\n \"\"\"\n)\nresult_union = pw.sql(\"SELECT * FROM tab UNION SELECT * FROM tab2\", tab=t, tab2=t_union)\npw.debug.compute_and_print(result_union)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^KYCVNKF... | 1 | 2\n ^856GZ16... | 2 | 7\n ^H3J0A0V... | 4 | 3\n ^GX1QVN0... | 4 | 7\n ^7HC68KR... | 9 | 3\n"} -{"doc": "`INTERSECT`\nPathway provides the standard `INTERSECT` SQL operator.\nNote that `INTERSECT` requires matching column names.\n```python\nt_inter = pw.debug.table_from_markdown(\n \"\"\"\n | a | b\n 4 | 9 | 3\n 5 | 2 | 7\n 6 | 1 | 2\n \"\"\"\n)\nresult_inter = pw.sql(\n \"SELECT * FROM tab INTERSECT SELECT * FROM tab2\", tab=t, tab2=t_inter\n)\npw.debug.compute_and_print(result_inter)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^KYCVNKF... | 1 | 2\n\u26a0\ufe0f `INTERSECT` does not support `INTERSECT ALL` (coming soon).\n"} -{"doc": "`JOIN`\nPathway provides different join operations: `INNER JOIN`, `LEFT JOIN` (or `LEFT OUTER JOIN`), `RIGHT JOIN` (or `RIGHT OUTER JOIN`), `SELF JOIN`, and `CROSS JOIN`.\n```python\nt_join = pw.debug.table_from_markdown(\n \"\"\"\n | b | c\n 4 | 4 | 9\n 5 | 3 | 4\n 6 | 7 | 5\n \"\"\"\n)\nresult_join = pw.sql(\n \"SELECT * FROM left_table INNER JOIN right_table ON left_table.b==right_table.b\",\n left_table=t,\n right_table=t_join,\n)\npw.debug.compute_and_print(result_join)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b | c\n ^J1AVR2S... | 4 | 3 | 4\n ^8V184A9... | 4 | 7 | 5\n\u26a0\ufe0f `GROUP BY` and `JOIN` should not be used together in a single `SELECT`.\n\u26a0\ufe0f `NATURAL JOIN` and `FULL JOIN` are not supported (coming soon).\n"} -{"doc": "`WITH`\nIn addition to being placed inside a `WHERE` clause, subqueries can also be performed using the `WITH` keyword:\n```python\nresult_with = pw.sql(\n \"WITH group_table (a, sumB) AS (SELECT a, SUM(b) FROM tab GROUP BY a) SELECT sumB FROM group_table\",\n tab=t,\n)\npw.debug.compute_and_print(result_with)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | sumB\n ^YYY4HAB... | 2\n ^3HN31E1... | 10\n"} -{"doc": "Boolean and Arithmetic Expressions\nWith the `SELECT ...` and `WHERE ...` clauses, you can use the following operators:\n- boolean operators: `AND`, `OR`, `NOT`\n- arithmetic operators: `+`, `-`, `*`, `/`, `DIV`, `MOD`, `==`, `!=`, `<`, `>`, `<=`, `>=`, `<>`\n- NULL\n```python\nresult_bool = pw.sql(\"SELECT a,b FROM tab WHERE b-a>0 AND a>3\", tab=t)\npw.debug.compute_and_print(result_bool)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^3CZ78B4... | 4 | 7\nBoth `!=` and `<>` can be used to check non-equality.\n```python\nresult_neq = pw.sql(\"SELECT a,b FROM tab WHERE a != 4 OR b <> 3\", tab=t)\npw.debug.compute_and_print(result_neq)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^YYY4HAB... | 1 | 2\n ^3CZ78B4... | 4 | 7\n`NULL` can be used to filter out rows with missing values:\n```python\nt_null = pw.debug.table_from_markdown(\n \"\"\"\n | a | b\n 1 | 1 | 2\n 2 | 4 |\n 3 | 4 | 7\n \"\"\"\n)\nresult_null = pw.sql(\"SELECT a, b FROM tab WHERE b IS NOT NULL \", tab=t_null)\npw.debug.compute_and_print(result_null)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^YYY4HAB... | 1 | 2\n ^3CZ78B4... | 4 | 7\nYou can use single row result subqueries in the `WHERE` clause to filter a table based on the subquery results:\n```python\nt_subqueries = pw.debug.table_from_markdown(\n \"\"\"\n | employee | salary\n 1 | 1 | 10\n 2 | 2 | 11\n 3 | 3 | 12\n \"\"\"\n)\nresult_subqueries = pw.sql(\n \"SELECT employee, salary FROM t WHERE salary >= (SELECT AVG(salary) FROM t)\",\n t=t_subqueries,\n)\npw.debug.compute_and_print(result_subqueries)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | employee | salary\n ^Z3QWT29... | 2 | 11\n ^3CZ78B4... | 3 | 12\n\u26a0\ufe0f For now, only single row result subqueries are supported.\nCorrelated subqueries and the associated operations `ANY`, `NONE`, and `EVERY` (or its alias `ALL`) are currently not supported.\n"} -{"doc": "Subpackages\n* pathway.xpacks.spatial package\n * Submodules\n * pathway.xpacks.spatial.geofencing module\n * `GeofenceIndex`\n * `GeofenceIndex.join_enclosing_geofences()`\n * `is_in_geofence()`\n * pathway.xpacks.spatial.h3 module\n * `h3_cover_geojson()`\n * pathway.xpacks.spatial.index module\n * `H3Index`\n * `H3Index.join_on_distance()`\n"} -{"doc": "---\ntitle: pathway.io.s3_csv package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.s3_csv package\nFunctions\npw.io.s3_csv.read(path, *, aws_s3_settings=None, schema=None, csv_settings=None, mode='streaming', autocommit_duration_ms=1500, persistent_id=None, debug_data=None, value_columns=None, id_columns=None, types=None, default_values=None, kwargs)\nReads a table from one or several objects in Amazon S3 bucket.\nIn case the prefix of S3 path is specified, and there are several objects lying\nunder this prefix, their order is determined according to their modification times:\nthe smaller the modification time is, the earlier the file will be passed to the\nengine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in Amazon S3 bucket.\n * aws_s3_settings (`Optional`\\[`AwsS3Settings`\\]) \u2013 Connection parameters for the S3 account and the bucket.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 The settings for the CSV parser.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new input\n files in the bucket, which fall under the path prefix. Set it to \u201cstatic\u201d, it will only\n consider the available data and ingest all of it in one commit. Default value is\n \u201cstreaming\u201d.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Names of the columns to be extracted from the files. \\[will be deprecated soon\\]\n * id_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nExample:\nLet\u2019s consider an object store, which is hosted in Amazon S3. The store contains\ndatasets in the respective bucket and is located in the region eu-west-3. The goal\nis to read the dataset, located under the path `animals/` in this bucket.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.s3_csv.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3_csv.AwsS3Settings(\n bucket_name=\"datasets\",\n region=\"eu-west-3\",\n access_key=os.environ[\"S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"S3_SECRET_ACCESS_KEY\"],\n ),\n schema=InputSchema,\n)\n```\nAlternatively, there might be a need to read the data from S3 storage, which is\nhosted in a different cloud and, therefore, requires to specify a custom endpoint.\nIt can be done with the usage of an extra parameter endpoint of AwsS3Settings\nobject. An example for the OVH-hosted bucket would then look as follows:\n```python\nimport os\nimport pathway as pw\nt = pw.io.s3_csv.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3_csv.AwsS3Settings(\n bucket_name=\"datasets\",\n region=\"rbx\",\n endpoint=\"s3.rbx.io.cloud.ovh.net\",\n access_key=os.environ[\"OVH_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"OVH_S3_SECRET_ACCESS_KEY\"],\n ),\n schema=InputSchema,\n)\n```\nIn case you are dealing with custom S3 buckets, there are two ways\nto work with paths in requests. The default and the one used by AWS S3 is a\nvirtually hosted-style. However, some installations of S3 in, for example, min.io\ndo require to use of path-style requests. If this is the case, you can use the\nparameter with_path_style of AwsS3Settings.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nt = pw.io.s3_csv.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3_csv.AwsS3Settings(\n bucket_name=\"datasets\",\n endpoint=\"avv749.stackhero-network.com\",\n access_key=os.environ[\"MINIO_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"MINIO_S3_SECRET_ACCESS_KEY\"],\n with_path_style=True,\n ),\n schema=InputSchema,\n)\n```\n"} diff --git a/examples/pipelines/contextful/docker-compose.yml b/examples/pipelines/contextful/docker-compose.yml deleted file mode 100644 index 20a3924..0000000 --- a/examples/pipelines/contextful/docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: "3.8" -services: - pathway: - build: - context: . - ports: - - "8080:8080" - environment: - OPENAI_API_KEY: - PATHWAY_PERSISTENT_STORAGE: - volumes: - - "./data:/app/data" - streamlit_ui: - depends_on: - - pathway - build: - context: ./ui - ports: - - "8501:8501" - environment: - PATHWAY_REST_CONNECTOR_HOST: "pathway" diff --git a/examples/pipelines/contextful/ui/Dockerfile b/examples/pipelines/contextful/ui/Dockerfile deleted file mode 100644 index 78e2121..0000000 --- a/examples/pipelines/contextful/ui/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.11 - -WORKDIR /app - -RUN pip install streamlit python-dotenv - -COPY . . - -EXPOSE 8501 - -CMD ["streamlit", "run", "server.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/examples/pipelines/contextful/ui/server.py b/examples/pipelines/contextful/ui/server.py deleted file mode 100644 index a8160fb..0000000 --- a/examples/pipelines/contextful/ui/server.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -import requests -import streamlit as st -from dotenv import load_dotenv - -with st.sidebar: - st.markdown( - "[View the source code on GitHub](https://github.com/pathwaycom/llm-app)" - ) - -# Load environment variables -load_dotenv() -api_host = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "127.0.0.1") -api_port = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", 8080)) - - -# Streamlit UI elements -st.title("LLM App") - - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages from history on app rerun -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - -# React to user input -if prompt := st.chat_input("How can I help you today?"): - # Display user message in chat message container - with st.chat_message("user"): - st.markdown(prompt) - - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - url = f"http://{api_host}:{api_port}/" - data = {"query": prompt, "user": "user"} - - response = requests.post(url, json=data) - - if response.status_code == 200: - response = response.json() - with st.chat_message("assistant"): - st.markdown(response) - st.session_state.messages.append({"role": "assistant", "content": response}) - else: - st.error(f"Failed to send data. Status code: {response.status_code}") diff --git a/examples/pipelines/contextful_geometric/Dockerfile b/examples/pipelines/contextful_geometric/Dockerfile deleted file mode 100644 index aed6703..0000000 --- a/examples/pipelines/contextful_geometric/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM pathwaycom/pathway:latest -WORKDIR /app -COPY . . -EXPOSE 8080 - -CMD ["python", "app.py"] diff --git a/examples/pipelines/contextful_geometric/README.md b/examples/pipelines/contextful_geometric/README.md deleted file mode 100644 index bf9364f..0000000 --- a/examples/pipelines/contextful_geometric/README.md +++ /dev/null @@ -1,83 +0,0 @@ -

- - GCP Logo Deploy with GCP - | - - Render Logo Deploy with Render - -

- -# RAG pipeline with up-to-date knowledge: get answers based on increasing number of documents - -This example implements a pipeline that answers questions based on documents in a given folder. To get the answer it sends increasingly more documents to the LLM chat until it can find an answer. You can read more about the reasoning behind this approach [here](https://pathway.com/developers/templates/adaptive-rag). - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentations pages -and sent to the OpenAI chat service for processing. - -To optimize use of tokens per query, this pipeline asks a question with a small number -of documents embedded in the prompt. If OpenAI chat fails to answer based on these documents, -the number of documents is increased by `factor` given as an argument, and continues to -do so until either question is answered or a limit of iterations is reached. - -## How to run the project - -### Setup environment: -Set your env variables in the .env file placed in this directory. - -```bash -OPENAI_API_KEY=sk-... -PATHWAY_DATA_DIR= # If unset, defaults to ./data/. If running with Docker, when you change this variable you may need to change the volume mount. -PATHWAY_PERSISTENT_STORAGE= # Set this variable if you want to use caching -``` - -### Run with Docker - -To run jointly the Alert pipeline and a simple UI execute: - -```bash -docker compose up --build -``` - -Then, the UI will run at http://0.0.0.0:8501 by default. You can access it by following this URL in your web browser. - -The `docker-compose.yml` file declares a [volume bind mount](https://docs.docker.com/reference/cli/docker/container/run/#volume) that makes changes to files under `data/` made on your host computer visible inside the docker container. The files in `data/live` are indexed by the pipeline - you can paste new files there and they will impact the computations. - -### Run manually - -Alternatively, you can run each service separately. - -Make sure you have installed poetry dependencies with `--extras unstructured`. -```bash -poetry install --with examples --extras unstructured -``` - -Then run: -```bash -poetry run python app.py -``` - -If all dependencies are managed manually rather than using poetry, you can alternatively use: -```bash -python app.py -``` - -To run the Streamlit UI, run: -```bash -streamlit run ui/server.py --server.port 8501 --server.address 0.0.0.0 -``` - -### Querying the pipeline - -To query the pipeline, you can call the REST API: - -```bash -curl --data '{ - "user": "user", - "query": "How to connect to Kafka in Pathway?" -}' http://localhost:8080/ | jq -``` - -or access the Streamlit UI at `0.0.0.0:8501`. - diff --git a/examples/pipelines/contextful_geometric/__init__.py b/examples/pipelines/contextful_geometric/__init__.py deleted file mode 100644 index 0565668..0000000 --- a/examples/pipelines/contextful_geometric/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .app import run - -__all__ = ["run"] diff --git a/examples/pipelines/contextful_geometric/app.py b/examples/pipelines/contextful_geometric/app.py deleted file mode 100644 index 69546e3..0000000 --- a/examples/pipelines/contextful_geometric/app.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Microservice for a context-aware ChatGPT assistant. - -The following program reads in a collection of documents, -embeds each document using the OpenAI document embedding model, -then builds an index for fast retrieval of documents relevant to a question, -effectively replacing a vector database. - -The program then starts a REST API endpoint serving queries about programming in Pathway. - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentations pages -and sent to the OpenAI chat service for processing. - -To optimize use of tokens per query, this pipeline asks a question with a small number -of documents embedded in the prompt. If OpenAI chat fails to answer based on these documents, -the number of documents is increased by `factor` given as an argument, and continues to -do so until either question is answered or a limit of iterations is reached. - -Please check the README.md in this directory for how-to-run instructions. -""" - -import os - -import dotenv -import pathway as pw -from pathway.stdlib.indexing import default_vector_document_index -from pathway.xpacks.llm.embedders import OpenAIEmbedder -from pathway.xpacks.llm.llms import OpenAIChat -from pathway.xpacks.llm.question_answering import ( - answer_with_geometric_rag_strategy_from_index, -) - -# To use advanced features with Pathway Scale, get your free license key from -# https://pathway.com/features and paste it below. -# To use Pathway Community, comment out the line below. -pw.set_license_key("demo-license-key-with-telemetry") - -dotenv.load_dotenv() - - -class DocumentInputSchema(pw.Schema): - doc: str - - -class QueryInputSchema(pw.Schema): - query: str - user: str - - -def run( - *, - data_dir: str = os.environ.get("PATHWAY_DATA_DIR", "./data/"), - api_key: str = os.environ.get("OPENAI_API_KEY", ""), - host: str = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "0.0.0.0"), - port: int = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", "8080")), - embedder_locator: str = "text-embedding-ada-002", - embedding_dimension: int = 1536, - model_locator: str = "gpt-3.5-turbo", - max_tokens: int = 60, - temperature: float = 0.0, - n_starting_documents: int = 2, - factor: int = 2, - max_iterations: int = 4, - **kwargs, -): - embedder = OpenAIEmbedder( - api_key=api_key, - model=embedder_locator, - retry_strategy=pw.udfs.FixedDelayRetryStrategy(), - cache_strategy=pw.udfs.DefaultCache(), - ) - - documents = pw.io.jsonlines.read( - data_dir, - schema=DocumentInputSchema, - mode="streaming", - autocommit_duration_ms=50, - ) - - index = default_vector_document_index( - data_column=documents.doc, - data_table=documents, - dimensions=embedding_dimension, - embedder=embedder, - ) - - query, response_writer = pw.io.http.rest_connector( - host=host, - port=port, - schema=QueryInputSchema, - autocommit_duration_ms=50, - delete_completed_queries=True, - ) - - model = OpenAIChat( - api_key=api_key, - model=model_locator, - temperature=temperature, - max_tokens=max_tokens, - retry_strategy=pw.udfs.FixedDelayRetryStrategy(), - cache_strategy=pw.udfs.DefaultCache(), - ) - - responses = query.select( - result=answer_with_geometric_rag_strategy_from_index( - query.query, - index, - documents.doc, - model, - n_starting_documents, - factor, - max_iterations, - ) - ) - - response_writer(responses) - - pw.run() - - -if __name__ == "__main__": - run() diff --git a/examples/pipelines/contextful_geometric/data/pathway-docs.jsonl b/examples/pipelines/contextful_geometric/data/pathway-docs.jsonl deleted file mode 100644 index 929737d..0000000 --- a/examples/pipelines/contextful_geometric/data/pathway-docs.jsonl +++ /dev/null @@ -1,415 +0,0 @@ -{"doc": "---\ntitle: pathway.io.sqlite package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.sqlite package\nFunctions\npw.io.sqlite.read(path, table_name, schema, *, autocommit_duration_ms=1500, debug_data=None)\nReads a table from a rowid table in SQLite database.\n* Parameters\n * path (`PathLike` | `str`) \u2013 Path to the database file.\n * table_name (`str`) \u2013 Name of the table in the database to be read.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 The maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "Notes\nThe CSV files should follow a standard CSV settings: the separator is \u2018,\u2019, the\nquotechar is \u2018\u201d\u2019, and there is no escape.\npw.demo.replay_csv_with_time(path, *, schema, time_column, unit='s', autocommit_ms=100, speedup=1)\nReplay a static CSV files as a data stream while respecting the time between updated based on a timestamp columns.\nThe timestamps in the file should be ordered positive integers.\n* Parameters\n * path (`str`) \u2013 Path to the file to stream.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * time_column (`str`) \u2013 Column containing the timestamps.\n * unit (`str`) \u2013 Unit of the timestamps. Only \u2018s\u2019, \u2018ms\u2019, \u2018us\u2019, and \u2018ns\u2019 are supported. Defaults to \u2018s\u2019.\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * speedup (`float`) \u2013 Produce stream speedup times faster than it would result from the time column.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "---\ntitle: Demo API\nsidebar: 'API'\nnavigation: true\n---\n# Demo API\nThe demo module allows you to create custom data streams from scratch or by utilizing a CSV file.\nThis feature empowers you to effectively test and debug your Pathway implementation using realtime data.\nPathway demo module\nTypical use:\n```python\nclass InputSchema(pw.Schema):\n name: str\n age: int\npw.demo.replay_csv(\"./input_stream.csv\", schema=InputSchema)\n```\n::\nResult\n```\n, 'age': }>\n```\n::\n::\nFunctions\npw.demo.generate_custom_stream(value_generators, *, schema, nb_rows=None, autocommit_duration_ms=1000, input_rate=1.0, persistent_id=None)\nGenerates a data stream.\nThe generator creates a table and periodically streams rows.\nIf a `nb_rows` value is provided, there are `nb_rows` row generated in total,\nelse the generator streams indefinitely.\nThe rows are generated iteratively and have an associated index x, starting from 0.\nThe values of each column are generated by their associated function in `value_generators`.\n* Parameters\n * value_generators (`dict`\\[`str`, `Any`\\]) \u2013 Dictionary mapping column names to functions that generate values for each column.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * nb_rows (`Optional`\\[`int`\\]) \u2013 The number of rows to generate. Defaults to None. If set to None, the generator\n generates streams indefinitely.\n * types \u2013 Dictionary containing the mapping between the columns and the data types (`pw.Type`) of the values of those columns. This parameter is optional, and if not provided the default type is `pw.Type.ANY`.\n * autocommit_duration_ms (`int`) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * input_rate (`float`) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *Table* \u2013 The generated table.\nExample:\n"} -{"doc": "---\ntitle: Demo API\nsidebar: 'API'\nnavigation: true\n---\n# Demo API\nThe demo module allows you to create custom data streams from scratch or by utilizing a CSV file.\nThis feature empowers you to effectively test and debug your Pathway implementation using realtime data.\nPathway demo module\nTypical use:\n```python\nvalue_functions = {\n 'number': lambda x: x + 1,\n 'name': lambda x: f'Person {x}',\n 'age': lambda x: 20 + x,\n}\nclass InputSchema(pw.Schema):\n number: int\n name: str\n age: int\npw.demo.generate_custom_stream(value_functions, schema=InputSchema, nb_rows=10)\n```\n::\nResult\n```\n, 'name': , 'age': }>\n```\n::\n::\nIn the above example, a data stream is generated with 10 rows, where each row has columns \u2018number\u2019, \u2018name\u2019, and \u2018age\u2019.\nThe \u2018number\u2019 column contains values incremented by 1 from 1 to 10, the \u2018name\u2019 column contains \u2018Person\u2019\nfollowed by the respective row index, and the \u2018age\u2019 column contains values starting from 20 incremented by\nthe row index.\npw.demo.noisy_linear_stream(nb_rows=10, input_rate=1.0)\nGenerates an artificial data stream for the linear regression tutorial.\n* Parameters\n * nb_rows (*int, optional*) \u2013 The number of rows to generate in the data stream. Defaults to 10.\n * input_rate (*float, optional*) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *pw.Table* \u2013 A table containing the generated data stream.\nExample:\n```python\ntable = pw.demo.noisy_linear_stream(nb_rows=100, input_rate=2.0)\n```\nIn the above example, an artificial data stream is generated with 100 rows. Each row has two columns, \u2018x\u2019 and \u2018y\u2019.\nThe \u2018x\u2019 values range from 0 to 99, and the \u2018y\u2019 values are equal to \u2018x\u2019 plus some random noise.\npw.demo.range_stream(nb_rows=30, offset=0, input_rate=1.0)\nGenerates a simple artificial data stream, used to compute the sum in our examples.\n* Parameters\n * nb_rows (*int, optional*) \u2013 The number of rows to generate in the data stream. Defaults to 30.\n * offset (*int, optional*) \u2013 The offset value added to the generated \u2018value\u2019 column. Defaults to 0.\n * input_rate (*float, optional*) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *pw.Table* \u2013 a table containing the generated data stream.\nExample:\n```python\ntable = pw.demo.range_stream(nb_rows=50, offset=10, input_rate=2.5)\n```\nIn the above example, an artificial data stream is generated with a single column \u2018value\u2019 and 50 rows.\nThe \u2018value\u2019 column contains values ranging from \u2018offset\u2019 (10 in this case) to \u2018nb_rows\u2019 + \u2018offset\u2019 (60).\npw.demo.replay_csv(path, *, schema, input_rate=1.0)\nReplay a static CSV files as a data stream.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file to stream.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * input_rate (*float, optional*) \u2013 The rate at which rows are read per second. Defaults to 1.0.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "---\ntitle: pathway.stdlib.statistical package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.statistical package\nFunctions\npw.statistical.interpolate(self, timestamp, *values, mode=InterpolateMode.LINEAR)\nInterpolates missing values in a column using the previous and next values based on a timestamps column.\n* Parameters\n * timestamp (*ColumnReference*) \u2013 Reference to the column containing timestamps.\n * \\*values (*ColumnReference*) \u2013 References to the columns containing values to be interpolated.\n * mode (*InterpolateMode, optional*) \u2013 The interpolation mode. Currently, only InterpolateMode.LINEAR is supported. Default is InterpolateMode.LINEAR.\n* Returns\n *Table* \u2013 A new table with the interpolated values.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference or if the interpolation mode is not supported.\nNOTE: * The interpolation is performed based on linear interpolation between the previous and next values.\n* If a value is missing at the beginning or end of the column, no interpolation is performed.\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values_a | values_b\n1 | 1 | 10\n2 | |\n3 | 3 |\n4 | |\n5 | |\n6 | 6 | 60\n''')\ntable = table.interpolate(pw.this.timestamp, pw.this.values_a, pw.this.values_b)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values_a | values_b\n1 | 1 | 10\n2 | 2.0 | 20.0\n3 | 3 | 30.0\n4 | 4.0 | 40.0\n5 | 5.0 | 50.0\n6 | 6 | 60\n```\n::\n::\n"} -{"doc": "pathway.stdlib.graphs.louvain_communities.impl module\nFunctions\npw.graphs.louvain_communities.impl.exact_modularity(G, C, round_digits=16)\nThis function computes modularity of a given weighted graph G with\nrespect to clustering C.\nThis implementation is meant to be used for testing / development,\nas computing exact value requires us to know the exact sum of the edge weights,\nwhich creates long dependency chains, and may be slow.\nThis implementation rounds the modularity to round_digits decimal places\n(default is 16), for result res it returns round(res, ndigits = round_digits)\n"} -{"doc": "pathway.stdlib.ml.classifiers.test_lsh module\npw.ml.classifiers.test_lsh.test_bucketer_cosine()\nVerifies that L buckets were indeed created\npw.ml.classifiers.test_lsh.test_bucketer_euclidean()\nVerifies that L buckets were indeed created\npw.ml.classifiers.test_lsh.test_lsh()\nVerifies that close points are mapped together and distant ones - apart.\npw.ml.classifiers.test_lsh.test_lsh_bucketing()\nVerifies that bucketing is properly indexed.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.classifiers package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.classifiers package\nFunctions\npw.ml.classifiers.knn_lsh_classifier_train(data, L, type='euclidean', kwargs)\nBuild the LSH index over data.\nL the number of repetitions of the LSH scheme.\nReturns a LSH projector of type (queries: Table, k:Any) -> Table\npw.ml.classifiers.knn_lsh_classify(knn_model, data_labels, queries, k)\nClassify the queries.\nUse the knn_model to extract the k closest datapoints.\nThe queries are then labeled using a majority vote between the labels\nof the retrieved datapoints, using the labels provided in data_labels.\npw.ml.classifiers.knn_lsh_euclidean_classifier_train(data, d, M, L, A)\nBuild the LSH index over data using the Euclidean distances.\nd is the dimension of the data, L the number of repetition of the LSH scheme,\nM and A are specific to LSH with Euclidean distance, M is the number of random projections\ndone to create each bucket and A is the width of each bucket on each projection.\npw.ml.classifiers.knn_lsh_generic_classifier_train(data, lsh_projection, distance_function, L)\nBuild the LSH index over data using the a generic lsh_projector and its associated distance.\nL the number of repetitions of the LSH scheme.\nReturns a LSH projector of type (queries: Table, k:Any) -> Table\npw.ml.classifiers.knn_lsh_train(data, L, type='euclidean', kwargs)\nBuild the LSH index over data.\nL the number of repetitions of the LSH scheme.\nReturns a LSH projector of type (queries: Table, k:Any) -> Table\n"} -{"doc": "---\ntitle: pathway.io.fs package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.fs package\nFunctions\npw.io.fs.read(path, format, *, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, object_pattern='*', with_metadata=False, persistent_id=None, autocommit_duration_ms=1500, debug_data=None, value_columns=None, primary_key=None, types=None, default_values=None)\nReads a table from one or several files with the specified format.\nIn case the folder is passed to the engine, the order in which files from the\ndirectory are processed is determined according to the modification time of files\nwithin this folder: they will be processed by ascending order of the modification time.\nIn case the format is \u201cplaintext\u201d, the table will consist of a single column\n`data` with each cell containing a single line from the file.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file or to the folder with files.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d, \u201cplaintext\u201d, \u201cplaintext_by_file\u201d and \u201cbinary\u201d formats are supported. The difference between \u201cplaintext\u201d and \u201cplaintext_by_file\u201d is how the input is tokenized: if the \u201cplaintext\u201d option is chosen, it\u2019s split by the newlines. Otherwise, the files are split in full and one row will correspond to one file. In case the \u201cbinary\u201d format is specified, the data is read as raw bytes without UTF-8 parsing.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d, \u201cstatic\u201d, and \u201cstreaming_with_deletions\u201d are supported. If set to \u201cstreaming\u201d the engine will wait for the new input files in the directory. On the other hand, \u201cstreaming_with_deletions\u201d mode also tracks file deletions and modifications and reflects them in the state. For example, if a file was deleted, \u201cstreaming_with_deletions\u201dmode will also remove rows obtained by reading this file from the table. Finally, the \u201cstatic\u201d mode will only consider the available data and ingest all of it in one commit. The default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * object_pattern (`str`) \u2013 Unix shell style pattern for filtering only certain files in the directory. Ignored in case a path to a single file is specified.\n * with_metadata (`bool`) \u2013 When set to true, the connector will add an additional column named `_metadata` to the table. This column will be a JSON field that will contain two optional fields - `created_at` and `modified_at`. These fields will have integral UNIX timestamps for the creation and modification time respectively. Additionally, the column will also have an optional field named `owner` that will contain the name of the file owner (applicable only for Un). Finally, the column will also contain a field named `path` that will show the full path to the file from where a row was filled.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Names of the columns to be extracted from the files. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. Supported in \u201ccsv\u201d and \u201cjson\u201d formats.\n \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entriest value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider you want to read a dataset, stored in the filesystem in a standard CSV\nformat. The dataset contains data about pets and their owners.\nFor the sake of demonstration, you can prepare a small dataset by creating a CSV file\nvia a unix command line tool:\n```bash\nprintf \"id,owner,pet\\n1,Alice,dog\\n2,Bob,dog\\n3,Alice,cat\\n4,Bob,dog\" > dataset.csv\n```\nIn order to read it into Pathway\u2019s table, you can first do the import and then\nuse the `pw.io.fs.read` method:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.fs.read(\"dataset.csv\", format=\"csv\", schema=InputSchema)\n```\nThen, you can output the table in order to check the correctness of the read:\nCode\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice dog\n Bob dog\nAlice cat\n Bob dog\n```\n::\n::\nSimilarly, we can do the same for JSON format.\nFirst, we prepare a dataset:\n```bash\nprintf \"{\\\"id\\\":1,\\\"owner\\\":\\\"Alice\\\",\\\"pet\\\":\\\"dog\\\"}\n{\\\"id\\\":2,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"dog\\\"}\n{\\\"id\\\":3,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"cat\\\"}\n{\\\"id\\\":4,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"cat\\\"}\" > dataset.jsonlines\n```\nAnd then, we use the method with the \u201cjson\u201d format:\n```python\nt = pw.io.fs.read(\"dataset.jsonlines\", format=\"json\", schema=InputSchema)\n```\nNow let\u2019s try something different. Consider you have site access logs stored in a\nseparate folder in several files. For the sake of simplicity, a log entry contains\nan access ID, an IP address and the login of the user.\nA dataset, corresponding to the format described above can be generated, thanks to the\nfollowing set of unix commands:\n```bash\nmkdir logs\nprintf \"id,ip,login\\n1,127.0.0.1,alice\\n2,8.8.8.8,alice\" > logs/part_1.csv\nprintf \"id,ip,login\\n3,8.8.8.8,bob\\n4,127.0.0.1,alice\" > logs/part_2.csv\n```\nNow, let\u2019s see how you can use the connector in order to read the content of this\ndirectory into a table:\n```python\nclass InputSchema(pw.Schema):\n ip: str\n login: str\nt = pw.io.fs.read(\"logs/\", format=\"csv\", schema=InputSchema)\n```\nThe only difference is that you specified the name of the directory instead of the\nfile name, as opposed to what you had done in the previous example. It\u2019s that simple!\nAlternatively, we can do the same for the \u201cjson\u201d variant:\nThe dataset creation would look as follows:\n```bash\nmkdir logs\nprintf \"{\\\"id\\\":1,\\\"ip\\\":\\\"127.0.0.1\\\",\\\"login\\\":\\\"alice\\\"}\n{\\\"id\\\":2,\\\"ip\\\":\\\"8.8.8.8\\\",\\\"login\\\":\\\"alice\\\"}\" > logs/part_1.jsonlines\nprintf \"{\\\"id\\\":3,\\\"ip\\\":\\\"8.8.8.8\\\",\\\"login\\\":\\\"bob\\\"}\n{\\\"id\\\":4,\\\"ip\\\":\\\"127.0.0.1\\\",\\\"login\\\":\\\"alice\\\"}\" > logs/part_2.jsonlines\n```\nWhile reading the data from logs folder can be expressed as:\n```python\nt = pw.io.fs.read(\"logs/\", format=\"json\", schema=InputSchema, mode=\"static\")\n```\nBut what if you are working with a real-time system, which generates logs all the time.\nThe logs are being written and after a while they get into the log directory (this is\nalso called \u201clogs rotation\u201d). Now, consider that there is a need to fetch the new files\nfrom this logs directory all the time. Would Pathway handle that? Sure!\nThe only difference would be in the usage of `mode` field. So the code\nsnippet will look as follows:\n```python\nt = pw.io.fs.read(\"logs/\", format=\"csv\", schema=InputSchema, mode=\"streaming\")\n```\nOr, for the \u201cjson\u201d format case:\n```python\nt = pw.io.fs.read(\"logs/\", format=\"json\", schema=InputSchema, mode=\"streaming\")\n```\nWith this method, you obtain a table updated dynamically. The changes in the logs would incur\nchanges in the Business-Intelligence \u2018BI\u2019-ready data, namely, in the tables you would like to output. To see\nhow these changes are reported by Pathway, have a look at the\n\u201cStreams of Updates and Snapshots\u201d\narticle.\nFinally, a simple example for the plaintext format would look as follows:\n```python\nt = pw.io.fs.read(\"raw_dataset/lines.txt\", format=\"plaintext\")\n```\npw.io.fs.write(table, filename, format)\nWrites `table`\u2019s stream of updates to a file in the given format.\n* Parameters\n * table (`Table`) \u2013 Table to be written.\n * filename (`str` | `PathLike`) \u2013 Path to the target output file.\n * format (`str`) \u2013 Format to use for data output. Currently, there are two supported\n formats: \u201cjson\u201d and \u201ccsv\u201d.\n* Returns\n None\nExample:\nIn this simple example you can see how table output works.\nFirst, import Pathway and create a table:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nConsider you would want to output the stream of changes of this table in csv format.\nIn order to do that you simply do:\n```python\npw.io.fs.write(t, \"table.csv\", format=\"csv\")\n```\nNow, let\u2019s see what you have on the output:\n```bash\ncat table.csv\n```\n```csv\nage,owner,pet,time,diff\n10,\"Alice\",\"dog\",0,1\n9,\"Bob\",\"cat\",0,1\n8,\"Alice\",\"cat\",0,1\n```\nThe first three columns clearly represent the data columns you have. The column time\nrepresents the number of operations minibatch, in which each of the rows was read. In\nthis example, since the data is static: you have 0. The diff is another\nelement of this stream of updates. In this context, it is 1 because all three rows were read from\nthe input. All in all, the extra information in `time` and `diff` columns - in this case -\nshows us that in the initial minibatch (`time = 0`), you have read three rows and all of\nthem were added to the collection (`diff = 1`).\nAlternatively, this data can be written in JSON format:\n```python\npw.io.fs.write(t, \"table.jsonlines\", format=\"json\")\n```\nThen, we can also check the output file by executing the command:\n```bash\ncat table.jsonlines\n```\n```json\n{\"age\":10,\"owner\":\"Alice\",\"pet\":\"dog\",\"diff\":1,\"time\":0}\n{\"age\":9,\"owner\":\"Bob\",\"pet\":\"cat\",\"diff\":1,\"time\":0}\n{\"age\":8,\"owner\":\"Alice\",\"pet\":\"cat\",\"diff\":1,\"time\":0}\n```\nAs one can easily see, the values remain the same, while the format has changed to a plain JSON.\n"} -{"doc": "Subpackages\n* pathway.stdlib.ml.classifiers package\n * `knn_lsh_classifier_train()`\n * `knn_lsh_classify()`\n * `knn_lsh_euclidean_classifier_train()`\n * `knn_lsh_generic_classifier_train()`\n * `knn_lsh_train()`\n * Submodules\n * pathway.stdlib.ml.classifiers.test_lsh module\n * `test_bucketer_cosine()`\n * `test_bucketer_euclidean()`\n * `test_lsh()`\n * `test_lsh_bucketing()`\n* pathway.stdlib.ml.datasets package\n * Subpackages\n * pathway.stdlib.ml.datasets.classification package\n* pathway.stdlib.ml.smart_table_ops package\n * `Edge`\n * `Feature`\n * `FuzzyJoinFeatureGeneration`\n * `FuzzyJoinFeatureGeneration.as_integer_ratio()`\n * `FuzzyJoinFeatureGeneration.bit_count()`\n * `FuzzyJoinFeatureGeneration.bit_length()`\n * `FuzzyJoinFeatureGeneration.conjugate()`\n * `FuzzyJoinFeatureGeneration.denominator`\n * `FuzzyJoinFeatureGeneration.from_bytes()`\n * `FuzzyJoinFeatureGeneration.imag`\n * `FuzzyJoinFeatureGeneration.numerator`\n * `FuzzyJoinFeatureGeneration.real`\n * `FuzzyJoinFeatureGeneration.to_bytes()`\n * `FuzzyJoinNormalization`\n * `FuzzyJoinNormalization.as_integer_ratio()`\n * `FuzzyJoinNormalization.bit_count()`\n * `FuzzyJoinNormalization.bit_length()`\n * `FuzzyJoinNormalization.conjugate()`\n * `FuzzyJoinNormalization.denominator`\n * `FuzzyJoinNormalization.from_bytes()`\n * `FuzzyJoinNormalization.imag`\n * `FuzzyJoinNormalization.numerator`\n * `FuzzyJoinNormalization.real`\n * `FuzzyJoinNormalization.to_bytes()`\n * `JoinResult`\n * `Node`\n"} -{"doc": "pathway.stdlib.ml.index module\nclass pw.ml.index.KNNIndex(data_embedding, data, n_dimensions, n_or=20, n_and=10, bucket_length=10.0, distance_type='euclidean')\nA K-Nearest Neighbors (KNN) index implementation using the Locality-Sensitive Hashing (LSH)\nalgorithm within Pathway. This index is designed to efficiently find the\nnearest neighbors of a given query embedding within a dataset.\n* Parameters\n * data_embedding (*pw.ColumnExpression*) \u2013 The column expression representing embeddings in the data.\n * data (*pw.Table*) \u2013 The table containing the data to be indexed.\n * n_dimensions (*int*) \u2013 number of dimensions in the data\n * n_or (*int*) \u2013 number of ORs\n * n_and (*int*) \u2013 number of ANDs\n * bucket_length (*float*) \u2013 bucket length (after projecting on a line)\n * distance_type (*str*) \u2013 euclidean metric is supported.\nget_nearest_items(query_embedding, k=3, collapse_rows=True)\nThis method queries the index with given queries and returns \u2018k\u2019 most relevant documents\nfor each query in the stream. While using this method, documents associated with\nthe queries will be updated if new more relevant documents appear.\nIf you don\u2019t want queries results to get updated in the future, take a look at\nget_nearest_items_asof_now.\n* Parameters\n * query_embedding (`ColumnReference`) \u2013 column of embedding vectors precomputed from the query.\n * k (`int`) \u2013 The number of most relevant documents to return for each query.\n Defaults to 3.\n * collapse_rows (`bool`) \u2013 Determines the format of the output. If set to True,\n multiple rows corresponding to a single query will be collapsed into a single row,\n with each column containing a tuple of values from the original rows. If set to False,\n the output will retain the multi-row format for each query. Defaults to True.\n* Returns\n pw.Table\n* If `collapse_rows` is set to True: Returns a table where each row corresponds to a unique query.\nEach column in the row contains a tuple (or list) of values, aggregating up\nto \u2018k\u2019 matches from the dataset.\nFor example:\n```text\n | name | age\n^YYY4HAB... | () | ()\n^X1MXHYY... | ('bluejay', 'cat', 'eagle') | (43, 42, 41)\n```\n* If `collapse_rows` is set to False: Returns a table where each row represents a match from the dataset\nfor a given query. Multiple rows can correspond to the same query, up to \u2018k\u2019 matches.\nExample:\n```text\nname | age | embedding | query_id\n | | | ^YYY4HAB...\nbluejay | 43 | (4, 3, 2) | ^X1MXHYY...\ncat | 42 | (3, 3, 2) | ^X1MXHYY...\neagle | 41 | (2, 3, 2) | ^X1MXHYY...\n```\nExample:\nCode\n```python\nimport pathway as pw\nimport pandas as pd\ndocuments = pw.debug.table_from_pandas(\n pd.DataFrame.from_records([\n {\"document\": \"document 1\", \"embeddings\":[1,-1, 0]},\n {\"document\": \"document 2\", \"embeddings\":[1, 1, 0]},\n {\"document\": \"document 3\", \"embeddings\":[0, 0, 1]},\n ])\n)\nindex = KNNIndex(documents.embeddings, documents, n_dimensions=3)\nqueries = pw.debug.table_from_pandas(\n pd.DataFrame.from_records([\n {\"query\": \"What is doc 3 about?\", \"embeddings\":[.1, .1, .1]},\n {\"query\": \"What is doc -5 about?\", \"embeddings\":[-1, 10, -10]},\n ])\n)\nrelevant_docs = index.get_nearest_items(queries.embeddings, k=2)\npw.debug.compute_and_print(relevant_docs)\n```\n::\nResult\n```\n | document | embeddings\n^YYY4HAB... | () | ()\n^X1MXHYY... | ('document 2', 'document 3') | ((1, 1, 0), (0, 0, 1))\n```\n::\n::\nget_nearest_items_asof_now(query_embedding, k=3, collapse_rows=True)\nThis method queries the index with given queries and returns \u2018k\u2019 most relevant documents\nfor each query in the stream. The already answered queries are not updated in\nthe future if new documents appear.\n* Parameters\n * query_embedding (`ColumnReference`) \u2013 column of embedding vectors precomputed from the query.\n * k (`int`) \u2013 The number of most relevant documents to return for each query.\n Defaults to 3.\n * collapse_rows (`bool`) \u2013 Determines the format of the output. If set to True,\n multiple rows corresponding to a single query will be collapsed into a single row,\n with each column containing a tuple of values from the original rows. If set to False,\n the output will retain the multi-row format for each query. Defaults to True.\nFor examples, see `get_nearest_items`.\n"} -{"doc": "---\ntitle: pathway.io.null package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.null package\nFunctions\npw.io.null.write(table)\nWrites `table`\u2019s stream of updates to the empty sink.\nInside this routine, the data is formatted into the empty object, and then doesn\u2019t\nget written anywhere.\n* Parameters\n table (`Table`) \u2013 Table to be written.\n* Returns\n None\nExample:\nOne (of a very few) examples, where you can probably need this kind of functionality\nif the case when a Pathway program is benchmarked and the IO part needs to be\nsimplified as much as possible.\nIf the table is `table`, the null output can be configured in the following way:\n```python\npw.io.null.write(table) \n```\n"} -{"doc": "---\ntitle: pathway.stdlib.indexing package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.indexing package\nclass pw.indexing.SortedIndex()\nclear(None. Remove all items from D.)\ncopy(a shallow copy of D)\nfromkeys(value=None, /)\nCreate a new dictionary with keys from iterable and values set to value.\nget(key, default=None, /)\nReturn the value for key if key is in the dictionary, else default.\nitems(a set-like object providing a view on D's items)\nkeys(a set-like object providing a view on D's keys)\npop(k, v, remove specified key and return the corresponding value.)\nIf the key is not found, return the default if given; otherwise,\nraise a KeyError.\npopitem()\nRemove and return a (key, value) pair as a 2-tuple.\nPairs are returned in LIFO (last-in, first-out) order.\nRaises KeyError if the dict is empty.\nsetdefault(key, default=None, /)\nInsert key with a value of default if key is not in the dictionary.\nReturn the value for key if key is in the dictionary, else default.\nupdate(FNone. Update D from dict/iterable E and F.)\nIf E is present and has a .keys() method, then does: for k in E: D\\[k\\] = E\\[k\\]\nIf E is present and lacks a .keys() method, then does: for k, v in E: D\\[k\\] = v\nIn either case, this is followed by: for k in F: D\\[k\\] = F\\[k\\]\nvalues(an object providing a view on D's values)\nFunctions\npw.indexing.retrieve_prev_next_values(ordered_table, value=None)\nRetrieve, for each row, a pointer to the first row in the ordered_table that contains a non-\u201cNone\u201d value, based on the orders defined by the prev and next columns.\n* Parameters\n * ordered_table (*pw.Table*) \u2013 Table with three columns: value, prev, next.\n The prev and next columns contain pointers to other rows.\n * value (*Optional\\[pw.ColumnReference\\]*) \u2013 Column reference pointing to the column containing values.\n If not provided, assumes the column name is \u201cvalue\u201d.\n* Returns\n *pw.Table* \u2013\n Table with two columns: prev_value and next_value.\n The prev_value column contains the values of the first row, according to the order defined by the column next, with a value different from None.\n The next_value column contains the values of the first row, according to the order defined by the column prev, with a value different from None.\n"} -{"doc": "pathway.stdlib.indexing.sorting module\nclass pw.indexing.sorting.Aggregate()\nclass pw.indexing.sorting.BinsearchOracle()\nclass pw.indexing.sorting.Candidate()\nclass pw.indexing.sorting.ComparisonRet()\nclass pw.indexing.sorting.Hash()\nclass pw.indexing.sorting.Instance()\nclass pw.indexing.sorting.Key()\nclass pw.indexing.sorting.LeftRight()\nclass pw.indexing.sorting.Node()\nclass pw.indexing.sorting.Parent()\nclass pw.indexing.sorting.PrefixSumOracle()\nclass pw.indexing.sorting.PrevNext()\nclass pw.indexing.sorting.SortedIndex()\nclear(None. Remove all items from D.)\ncopy(a shallow copy of D)\nfromkeys(value=None, /)\nCreate a new dictionary with keys from iterable and values set to value.\nget(key, default=None, /)\nReturn the value for key if key is in the dictionary, else default.\nitems(a set-like object providing a view on D's items)\nkeys(a set-like object providing a view on D's keys)\npop(k, v, remove specified key and return the corresponding value.)\nIf the key is not found, return the default if given; otherwise,\nraise a KeyError.\npopitem()\nRemove and return a (key, value) pair as a 2-tuple.\nPairs are returned in LIFO (last-in, first-out) order.\nRaises KeyError if the dict is empty.\nsetdefault(key, default=None, /)\nInsert key with a value of default if key is not in the dictionary.\nReturn the value for key if key is in the dictionary, else default.\nupdate(FNone. Update D from dict/iterable E and F.)\nIf E is present and has a .keys() method, then does: for k in E: D\\[k\\] = E\\[k\\]\nIf E is present and lacks a .keys() method, then does: for k, v in E: D\\[k\\] = v\nIn either case, this is followed by: for k in F: D\\[k\\] = F\\[k\\]\nvalues(an object providing a view on D's values)\nclass pw.indexing.sorting.Value()\npw.indexing.sorting.retrieve_prev_next_values(ordered_table, value=None)\nRetrieve, for each row, a pointer to the first row in the ordered_table that contains a non-\u201cNone\u201d value, based on the orders defined by the prev and next columns.\n* Parameters\n * ordered_table (*pw.Table*) \u2013 Table with three columns: value, prev, next.\n The prev and next columns contain pointers to other rows.\n * value (*Optional\\[pw.ColumnReference\\]*) \u2013 Column reference pointing to the column containing values.\n If not provided, assumes the column name is \u201cvalue\u201d.\n* Returns\n *pw.Table* \u2013\n Table with two columns: prev_value and next_value.\n The prev_value column contains the values of the first row, according to the order defined by the column next, with a value different from None.\n The next_value column contains the values of the first row, according to the order defined by the column prev, with a value different from None.\n"} -{"doc": "Usage\nReducers are used in `reduce` to compute the aggregated results obtained by a `groupby`:\n```python\nimport pathway as pw\n```\n```python\nmy_table.groupby(table.columnA).reduce(aggregated_result=pw.reducers.my_reducer(my_table.columnB))\n```\nWe use the following table `t` in the examples:\n```python\nt = pw.debug.table_from_markdown(\n \"\"\"\n | colA | colB | colC | colD\n 1 | valA | -1 | 5 | 4\n 2 | valA | 1 | 5 | 7\n 3 | valA | 2 | 5 | -3\n 4 | valB | 4 | 10 | 2\n 5 | valB | 4 | 10 | 6\n 6 | valB | 7 | 10 | 1\n \"\"\"\n)\npw.debug.compute_and_print(t)\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | colA | colB | colC | colD\n ^YYY4HAB... | valA | -1 | 5 | 4\n ^Z3QWT29... | valA | 1 | 5 | 7\n ^3CZ78B4... | valA | 2 | 5 | -3\n ^3HN31E1... | valB | 4 | 10 | 2\n ^3S2X6B2... | valB | 4 | 10 | 6\n ^A984WV0... | valB | 7 | 10 | 1\n"} -{"doc": "`tuple`\nReturn a tuple containing all the aggregated values. Order of values inside a tuple\nis consistent across application to many columns. If optional argument skip_nones is\nset to True, any Nones in aggregated values will be omitted from the result.\n```python\nt.groupby(t.colA).reduce(tuple_colB=pw.reducers.tuple(t.colB), tuple_colD=pw.reducers.tuple(t.colD))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | tuple_colB | tuple_colD\n ^ENHSR8M... | (-1, 1, 2) | (4, 7, -3)\n ^XN617D8... | (4, 4, 7) | (2, 6, 1)\n"} -{"doc": "`sorted_tuple`\nReturn a sorted tuple containing all the aggregated values. If optional argument skip_nones is\nset to True, any Nones in aggregated values will be omitted from the result.\n```python\nt.groupby(t.colA).reduce(tuples=pw.reducers.sorted_tuple(t.colB))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | tuples\n ^ENHSR8M... | (-1, 1, 2)\n ^XN617D8... | (4, 4, 7)\n"} -{"doc": "`ndarray`\nReturn an array containing all the aggregated values. Order of values inside an array\nis consistent across application to many columns. If optional argument skip_nones is\nset to True, any Nones in aggregated values will be omitted from the result.\n```python\nt.groupby(t.colA).reduce(tuple_colB=pw.reducers.ndarray(t.colB), tuple_colD=pw.reducers.ndarray(t.colD))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | tuple_colB | tuple_colD\n ^XN617D8... | [4 4 7] | [2 6 1]\n ^ENHSR8M... | [-1 1 2] | [ 4 7 -3]\n"} -{"doc": "`any`\nReturns any of the aggregated values. Values are consistent across application to many columns.\n```python\nt.groupby(t.colA).reduce(any_colB=pw.reducers.any(t.colB), any_colD=pw.reducers.any(t.colD))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | any_colB | any_colD\n ^ENHSR8M... | 2 | -3\n ^XN617D8... | 7 | 1\n"} -{"doc": "`unique`\nReturns aggregated value, if all values are identical. If values are not identical, exception is raised.\n```python\nt.groupby(t.colA).reduce(unique=pw.reducers.unique(t.colC))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | unique\n ^ENHSR8M... | 5\n ^XN617D8... | 10\n```python\nimport numpy as np\n```\n```python\n# ### `sum`\n#\n# Return the sum of the values of aggregated numpy arrays.\nimport pandas as pd\n```\n```python\nnp_table = pw.debug.table_from_pandas(\n pd.DataFrame(\n {\n \"data\": [\n np.array([1, 2, 3]),\n np.array([4, 5, 6]),\n np.array([7, 8, 9]),\n ]\n }\n )\n)\n```\n```python\nnp_table.reduce(data_sum=pw.reducers.sum(np_table.data))\n```\n [2023-10-19T14:44:23]:INFO:Preparing Pathway computation\n | data_sum\n ^PWSRT42... | [12 15 18]\n"} -{"doc": "---\ntitle: pathway.io.minio package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.minio package\nclass pw.io.minio.MinIOSettings(endpoint, bucket_name, access_key, secret_access_key, *, with_path_style=True, region=None)\nStores MinIO bucket connection settings.\n* Parameters\n * endpoint \u2013 Endpoint for the bucket.\n * bucket_name \u2013 Name of a bucket.\n * access_key \u2013 Access key for the bucket.\n * secret_access_key \u2013 Secret access key for the bucket.\n * region \u2013 Region of the bucket.\n * with_path_style \u2013 Whether to use path-style addresses for bucket access. It defaults to True as this is the most widespread way to access MinIO, but can be overridden in case of a custom configuration.\nFunctions\npw.io.minio.read(path, minio_settings, format, *, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from one or several objects from S3 bucket in MinIO.\nIn case the prefix is specified, and there are several objects lying under this\nprefix, their order is determined according to their modification times: the smaller\nthe modification time is, the earlier the file will be passed to the engine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in MinIO S3 bucket.\n * minio_settings (`MinIOSettings`) \u2013 Connection parameters for the MinIO account and the bucket.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d and \u201cplaintext\u201d\n formats are supported.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new objects under the\n given path prefix. Set it to \u201cstatic\u201d, it will only consider the available\n data and ingest all of it. Default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider that there is a table, which is stored in CSV format in the min.io S3\nbucket. Then, you can use this method in order to connect and acquire its contents.\nIt may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.minio.read(\n \"animals/\",\n minio_settings=pw.io.minio.MinIOSettings(\n bucket_name=\"datasets\",\n endpoint=\"avv749.stackhero-network.com\",\n access_key=os.environ[\"MINIO_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"MINIO_S3_SECRET_ACCESS_KEY\"],\n ),\n format=\"csv\",\n schema=InputSchema,\n)\n```\n"} -{"doc": "---\ntitle: pathway.io.logstash package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.logstash package\nFunctions\npw.io.logstash.write(table, endpoint, n_retries=0, retry_policy=, connect_timeout_ms=None, request_timeout_ms=None)\nSends the stream of updates from the table to HTTP input \nof Logstash. The data is sent in the format of flat JSON objects, with two extra\nfields for time and diff.\n* Parameters\n * table (`Table`) \u2013 table to be tracked;\n * endpoint (`str`) \u2013 Logstash endpoint, accepting entries;\n * n_retries (`int`) \u2013 number of retries in case of failure;\n * retry_policy (`RetryPolicy`) \u2013 policy of delays or backoffs for the retries;\n * connect_timeout_ms (`Optional`\\[`int`\\]) \u2013 connection timeout, specified in milliseconds. In case it\u2019s None, no restrictions on connection duration will be applied;\n * request_timeout_ms (`Optional`\\[`int`\\]) \u2013 request timeout, specified in milliseconds. In case it\u2019s None, no restrictions on request duration will be applied.\nExample:\nSuppose that we need to send the stream of updates to locally installed Logstash.\nFor example, you can use docker-elk \nrepository in order to get the ELK stack up and running at your local machine in a\nfew minutes.\nIf Logstash stack is installed, you need to configure the input pipeline. The\nsimplest possible way to do this, is to add the following lines in the input plugins\nlist:\n```text\nhttp {\n port => 8012\n}\n```\nThe port is specified for the sake of example and can be changed. Further, we will\nuse 8012 for clarity.\nNow, with the pipeline configured, you can stream the changed into Logstash as\nsimple as:\n```python\npw.io.logstash.write(table, \"http://localhost:8012\") \n```\n"} -{"doc": "---\ntitle: pathway.io.csv package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.csv package\nFunctions\npw.io.csv.read(path, value_columns=None, *, schema=None, csv_settings=None, mode='streaming', object_pattern='*', with_metadata=False, autocommit_duration_ms=1500, persistent_id=None, debug_data=None, id_columns=None, types=None, default_values=None, kwargs)\nReads a table from one or several files with delimiter-separated values.\nIn case the folder is passed to the engine, the order in which files from\nthe directory are processed is determined according to the modification time of\nfiles within this folder: they will be processed by ascending order of\nthe modification time.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file or to the folder with files.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Names of the columns to be extracted from the files. \\[will be deprecated soon\\]\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * id_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d, \u201cstatic\u201d, and \u201cstreaming_with_deletions\u201d are supported. If set to \u201cstreaming\u201d the engine will wait for the new input files in the directory. On the other hand, \u201cstreaming_with_deletions\u201d mode also tracks file deletions and modifications and reflects them in the state. For example, if a file was deleted, \u201cstreaming_with_deletions\u201dmode will also remove rows obtained by reading this file from the table. Finally, the \u201cstatic\u201d mode will only consider the available data and ingest all of it in one commit. The default value is \u201cstreaming\u201d.\n * object_pattern (`str`) \u2013 Unix shell style pattern for filtering only certain files in the directory. Ignored in case a path to a single file is specified.\n * with_metadata (`bool`) \u2013 When set to true, the connector will add an additional column named `_metadata` to the table. This column will be a JSON field that will contain two optional fields - `created_at` and `modified_at`. These fields will have integral UNIX timestamps for the creation and modification time respectively. Additionally, the column will also have an optional field named `owner` that will contain the name of the file owner (applicable only for Un). Finally, the column will also contain a field named `path` that will show the full path to the file from where a row was filled.\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider you want to read a dataset, stored in the filesystem in a standard CSV\nformat. The dataset contains data about pets and their owners.\nFor the sake of demonstration, you can prepare a small dataset by creating a CSV file\nvia a unix command line tool:\n```bash\nprintf \"id,owner,pet\\n1,Alice,dog\\n2,Bob,dog\\n3,Alice,cat\\n4,Bob,dog\" > dataset.csv\n```\nIn order to read it into Pathway\u2019s table, you can first do the import and then\nuse the pw.io.csv.read method:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.csv.read(\"dataset.csv\", schema=InputSchema, mode=\"static\")\n```\nThen, you can output the table in order to check the correctness of the read:\nCode\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice dog\n Bob dog\nAlice cat\n Bob dog\n```\n::\n::\nNow let\u2019s try something different. Consider you have site access logs stored in a\nseparate folder in several files. For the sake of simplicity, a log entry contains\nan access ID, an IP address and the login of the user.\nA dataset, corresponding to the format described above can be generated, thanks to the\nfollowing set of unix commands:\n```bash\nmkdir logs\nprintf \"id,ip,login\\n1,127.0.0.1,alice\\n2,8.8.8.8,alice\" > logs/part_1.csv\nprintf \"id,ip,login\\n3,8.8.8.8,bob\\n4,127.0.0.1,alice\" > logs/part_2.csv\n```\nNow, let\u2019s see how you can use the connector in order to read the content of this\ndirectory into a table:\n```python\nclass InputSchema(pw.Schema):\n ip: str\n login: str\nt = pw.io.csv.read(\"logs/\", schema=InputSchema, mode=\"static\")\n```\nThe only difference is that you specified the name of the directory instead of the\nfile name, as opposed to what you had done in the previous example. It\u2019s that simple!\nBut what if you are working with a real-time system, which generates logs all the time.\nThe logs are being written and after a while they get into the log directory (this is\nalso called \u201clogs rotation\u201d). Now, consider that there is a need to fetch the new files\nfrom this logs directory all the time. Would Pathway handle that? Sure!\nThe only difference would be in the usage of mode flag. So the code\nsnippet will look as follows:\n```python\nt = pw.io.csv.read(\"logs/\", schema=InputSchema, mode=\"streaming\")\n```\nWith this method, you obtain a table updated dynamically. The changes in the logs would incur\nchanges in the Business-Intelligence \u2018BI\u2019-ready data, namely, in the tables you would like to output. To see\nhow these changes are reported by Pathway, have a look at the\n\u201cStreams of Updates and Snapshots\u201d\narticle.\npw.io.csv.write(table, filename)\nWrites table\u2019s stream of updates to a file in delimiter-separated values format.\n* Parameters\n * table (`Table`) \u2013 Table to be written.\n * filename (`str` | `PathLike`) \u2013 Path to the target output file.\n* Returns\n None\nExample:\nIn this simple example you can see how table output works.\nFirst, import Pathway and create a table:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nConsider you would want to output the stream of changes of this table. In order to do that\nyou simply do:\n```python\npw.io.csv.write(t, \"table.csv\")\n```\nNow, let\u2019s see what you have on the output:\n```bash\ncat table.csv\n```\n```csv\nage,owner,pet,time,diff\n10,\"Alice\",\"dog\",0,1\n9,\"Bob\",\"cat\",0,1\n8,\"Alice\",\"cat\",0,1\n```\nThe first three columns clearly represent the data columns you have. The column time\nrepresents the number of operations minibatch, in which each of the rows was read. In\nthis example, since the data is static: you have 0. The diff is another\nelement of this stream of updates. In this context, it is 1 because all three rows were read from\nthe input. All in all, the extra information in `time` and `diff` columns - in this case -\nshows us that in the initial minibatch (`time = 0`), you have read three rows and all of\nthem were added to the collection (`diff = 1`).\n"} -{"doc": "---\ntitle: pathway.io.gdrive package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.gdrive package\nFunctions\npw.io.gdrive.read(object_id, *, mode='streaming', refresh_interval=30, service_user_credentials_file)\nReads a table from a Google Drive directory or file.\nIt will return a table with single column data containing each file in a binary format.\n* Parameters\n * object_id (`str`) \u2013 id of a directory or file. Directories will be scanned recursively.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d\n and \u201cstatic\u201d are supported. If set to \u201cstreaming\u201d, it will check for updates, deletions\n and new files every refresh_interval seconds. \u201cstatic\u201d mode will only consider\n the available data and ingest all of it in one commit.\n The default value is \u201cstreaming\u201d.\n * refresh_interval (`int`) \u2013 time in seconds between scans. Applicable if mode is set to \u2018streaming\u2019.\n * service_user_credentials_file (`str`) \u2013 Google API service user json file.\n* Returns\n The table read.\nExample:\n```python\nimport pathway as pw\ntable = pw.io.gdrive.read(\n object_id=\"0BzDTMZY18pgfcGg4ZXFRTDFBX0j\",\n service_user_credentials_file=\"credentials.json\"\n)\n```\n"} -{"doc": "---\ntitle: pathway.io.plaintext package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.plaintext package\nFunctions\npw.io.plaintext.read(path, *, mode='streaming', object_pattern='*', with_metadata=False, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from a text file or a directory of text files. The resulting table\nwill consist of a single column `data`, and have the number of rows equal to the number\nof lines in the file. Each cell will contain a single line from the file.\nIn case the folder is specified, and there are several files placed in the folder,\ntheir order is determined according to their modification times: the smaller the\nmodification time is, the earlier the file will be passed to the engine.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to a file or to a folder.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d, \u201cstatic\u201d, and \u201cstreaming_with_deletions\u201d are supported. If set to \u201cstreaming\u201d the engine will wait for the new input files in the directory. On the other hand, \u201cstreaming_with_deletions\u201d mode also tracks file deletions and modifications and reflects them in the state. For example, if a file was deleted, \u201cstreaming_with_deletions\u201dmode will also remove rows obtained by reading this file from the table. Finally, the \u201cstatic\u201d mode will only consider the available data and ingest all of it in one commit. The default value is \u201cstreaming\u201d.\n * object_pattern (`str`) \u2013 Unix shell style pattern for filtering only certain files in the directory. Ignored in case a path to a single file is specified.\n * with_metadata (`bool`) \u2013 When set to true, the connector will add an additional column named `_metadata` to the table. This column will be a JSON field that will contain two optional fields - `created_at` and `modified_at`. These fields will have integral UNIX timestamps for the creation and modification time respectively. Additionally, the column will also have an optional field named `owner` that will contain the name of the file owner (applicable only for Un). Finally, the column will also contain a field named `path` that will show the full path to the file from where a row was filled.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will be persisted or `None`, if there is no need to persist the state of this table. When a program restarts, it restores the state for all input tables according to what was saved for their `persistent_id`. This way it\u2019s possible to configure the start of computations from the moment they were terminated last time.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\n```python\nimport pathway as pw\nt = pw.io.plaintext.read(\"raw_dataset/lines.txt\")\n```\n"} -{"doc": "---\ntitle: pathway.stdlib.graphs package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.graphs package\nclass pw.graphs.Edge()\nBasic edge class, holds pointers to the endpoint vertices.\nclass pw.graphs.Graph(V, E)\nBasic class representing undirected, unweighted (multi)graph.\nclass pw.graphs.Vertex()\nclass pw.graphs.WeightedGraph(V, E, WE)\nBasic class representing undirected, unweighted (multi)graph.\n"} -{"doc": "Subpackages\n* pathway.stdlib.graphs.bellman_ford package\n * `DistFromSource`\n * `Vertex`\n * Submodules\n * pathway.stdlib.graphs.bellman_ford.impl module\n * `Dist`\n * `DistFromSource`\n * `Vertex`\n* pathway.stdlib.graphs.louvain_communities package\n * Submodules\n * pathway.stdlib.graphs.louvain_communities.impl module\n * `exact_modularity()`\n* pathway.stdlib.graphs.pagerank package\n * `Result`\n * Submodules\n * pathway.stdlib.graphs.pagerank.impl module\n * `Result`\n"} -{"doc": "pathway.stdlib.graphs.common module\nclass pw.graphs.common.Cluster()\nclass pw.graphs.common.Clustering()\nClass describing cluster membership relation:\nvertex u (id-column) belongs to cluster c.\nclass pw.graphs.common.Edge()\nBasic edge class, holds pointers to the endpoint vertices.\nclass pw.graphs.common.Vertex()\nclass pw.graphs.common.Weight()\nBasic weight class. To be used as extension of Vertex / Edge\n"} -{"doc": "pathway.stdlib.graphs.graph module\nclass pw.graphs.graph.Graph(V, E)\nBasic class representing undirected, unweighted (multi)graph.\nclass pw.graphs.graph.WeightedGraph(V, E, WE)\nBasic class representing undirected, unweighted (multi)graph.\n"} -{"doc": "---\ntitle: pathway.io.debezium package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.debezium package\nFunctions\npw.io.debezium.read(rdkafka_settings, topic_name, *, db_type=, schema=None, debug_data=None, autocommit_duration_ms=1500, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None)\nConnector, which takes a topic in the format of Debezium\nand maintains a corresponding table in Pathway, on which you can do all the\ntable operations provided. In order to do that, you will need a Debezium connector.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of\n librdkafka.\n * topic_name (`str`) \u2013 Name of topic in Kafka to which the updates are streamed.\n * db_type (`DebeziumDBType`) \u2013 Type of the database from which events are streamed;\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider there is a need to stream a database table along with its changes directly into\nthe Pathway engine. One of the standard well-known solutions for table streaming is\nDebezium:\nit supports streaming data from MySQL, Postgres, MongoDB and a few more databases directly to a\ntopic in Kafka. The streaming first sends a snapshot of the data and then streams\nchanges for the specific change (namely: inserted, updated or removed) rows.\nConsider there is a table in Postgres, which is\ncreated according to the following schema:\n```sql\nCREATE TABLE pets (\n id SERIAL PRIMARY KEY,\n age INTEGER,\n owner TEXT,\n pet TEXT\n);\n```\nThis table, by default, will be streamed to the topic with the same name. In order to\nread it,you need to set the settings for `rdkafka`. For the sake of demonstration,\nlet\u2019s take those from the example of the Kafka connector:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nNow, using the settings you can set up a connector. It is as simple as:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n id: str = pw.column_definition(primary_key=True)\n age: int\n owner: str\n pet: str\nt = pw.io.debezium.read(\n rdkafka_settings,\n topic_name=\"pets\",\n schema=InputSchema\n)\n```\nAs a result, upon its start, the connector would provide the full snapshot of the\ntable `pets` into the table `t` in Pathway. The table `t` can then be operated as\nusual. Throughout the run time, the rows in the table `pets` can change. In this\ncase, the changes in the result will be provided in the output connectors by the\nStream of Updates mechanism.\n"} -{"doc": "---\ntitle: pathway.io.postgres package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.postgres package\nFunctions\npw.io.postgres.write(table, postgres_settings, table_name, max_batch_size=None)\nWrites `table`\u2019s stream of updates to a postgres table.\nIn order for write to be successful, it is required that the table contains `time`\nand `diff` columns of the integer type - you can refer to the article\n\u201cStreams of Updates and Snapshots\u201d\nto learn more about the reasoning behind it.\n* Parameters\n * postgres_settings (`dict`) \u2013 Components for the connection string for Postgres.\n * table_name (`str`) \u2013 Name of the target table.\n * max_batch_size (`Optional`\\[`int`\\]) \u2013 Maximum number of entries allowed to be committed within a single transaction.\n* Returns\n None\nExample:\nConsider there\u2019s a need to output a stream of updates from a table in Pathway to\na table in Postgres. Let\u2019s see how this can be done with the connector.\nFirst of all, one needs to provide the required credentials for Postgres\nconnection string.\nWhile the connection string can include a wide variety of settings, such as SSL\nor connection timeouts, in this example we will keep it simple and provide the\nsmallest example possible. Suppose that the database is running locally on the standard\nport 5432, that it has the name `database` and is accessible under the username\n`user` with a password `pass`.\nIt gives us the following content for the connection string:\n```python\nconnection_string_parts = {\n \"host\": \"localhost\",\n \"port\": \"5432\",\n \"dbname\": \"database\",\n \"user\": \"user\",\n \"password\": \"pass\",\n}\n```\nNow let\u2019s load a table, which we will output to the database:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice 1 \\n 2 9 Bob 1 \\n 3 8 Alice 2\")\n```\nIn order to output the table, we will need to create a new table in the database. The table\nwould need to have all the columns that the output data has. Moreover it will need\ninteger columns `time` and `diff`, because these values are an essential part of the\noutput. Finally, it is also a good idea to create the sequential primary key for\nour changes so that we know the updates\u2019 order.\nTo sum things up, the table creation boils down to the following SQL command:\n```sql\nCREATE TABLE pets (\n id SERIAL PRIMARY KEY,\n time INTEGER NOT NULL,\n diff INTEGER NOT NULL,\n age INTEGER,\n owner TEXT,\n pet TEXT\n);\n```\nNow, having done all the preparation, one can simply call:\n```python\npw.io.postgres.write(\n t,\n connection_string_parts,\n \"pets\",\n)\n```\npw.io.postgres.write_snapshot(table, postgres_settings, table_name, primary_key, max_batch_size=None)\nMaintains a snapshot of a table within a Postgres table.\nIn order for write to be successful, it is required that the table contains `time`\nand `diff` columns of the integer type - you can refer to the article\n\u201cStreams of Updates and Snapshots\u201d\nto understand the reasoning behind it.\n* Parameters\n * postgres_settings (`dict`) \u2013 Components of the connection string for Postgres.\n * table_name (`str`) \u2013 Name of the target table.\n * primary_key (`list`\\[`str`\\]) \u2013 Names of the fields which serve as a primary key in the Postgres table.\n * max_batch_size (`Optional`\\[`int`\\]) \u2013 Maximum number of entries allowed to be committed within a single transaction.\n* Returns\n None\nExample:\nConsider there is a table `stats` in Pathway, containing the average number of requests to some\nservice or operation per user, over some period of time. The number of requests\ncan be large, so we decide not to store the whole stream of changes, but to only store\na snapshot of the data, which can be actualized by Pathway.\nThe minimum set-up would require us to have a Postgres table with two columns: the ID\nof the user `user_id` and the number of requests across some period of time `number_of_requests`.\nIn order to maintain consistency, we also need two extra columns: `time` and `diff`.\nThe SQL for the creation of such table would look as follows:\n```sql\nCREATE TABLE user_stats (\n user_id TEXT PRIMARY KEY,\n number_of_requests INTEGER,\n time INTEGER NOT NULL,\n diff INTEGER NOT NULL\n);\n```\nAfter the table is created, all you need is just to set up the output connector:\n```python\nimport pathway as pw\npw.io.postgres.write_snapshot( \n stats,\n {\n \"host\": \"localhost\",\n \"port\": \"5432\",\n \"dbname\": \"database\",\n \"user\": \"user\",\n \"password\": \"pass\",\n },\n \"user_stats\",\n [\"user_id\"],\n)\n```\n"} -{"doc": "---\ntitle: pathway.io.jsonlines package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.jsonlines package\nFunctions\npw.io.jsonlines.read(path, *, schema=None, mode='streaming', json_field_paths=None, object_pattern='*', with_metadata=False, autocommit_duration_ms=1500, persistent_id=None, debug_data=None, value_columns=None, primary_key=None, types=None, default_values=None)\nReads a table from one or several files in jsonlines format.\nIn case the folder is passed to the engine, the order in which files from\nthe directory are processed is determined according to the modification time of\nfiles within this folder: they will be processed by ascending order of\nthe modification time.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file or to the folder with files.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 denotes how the engine polls the new data from the source. Currently \u201cstreaming\u201d, \u201cstatic\u201d, and \u201cstreaming_with_deletions\u201d are supported. If set to \u201cstreaming\u201d the engine will wait for the new input files in the directory. On the other hand, \u201cstreaming_with_deletions\u201d mode also tracks file deletions and modifications and reflects them in the state. For example, if a file was deleted, \u201cstreaming_with_deletions\u201dmode will also remove rows obtained by reading this file from the table. Finally, the \u201cstatic\u201d mode will only consider the available data and ingest all of it in one commit. The default value is \u201cstreaming\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 This field allows to map field names into path in the field.\n For the field which require such mapping, it should be given in the format\n `: `, where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * object_pattern (`str`) \u2013 Unix shell style pattern for filtering only certain files in the directory. Ignored in case a path to a single file is specified.\n * with_metadata (`bool`) \u2013 When set to true, the connector will add an additional column named `_metadata` to the table. This column will be a JSON field that will contain two optional fields - `created_at` and `modified_at`. These fields will have integral UNIX timestamps for the creation and modification time respectively. Additionally, the column will also have an optional field named `owner` that will contain the name of the file owner (applicable only for Un). Finally, the column will also contain a field named `path` that will show the full path to the file from where a row was filled.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Names of the columns to be extracted from the files. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nExample:\nConsider you want to read a dataset, stored in the filesystem in a jsonlines\nformat. The dataset contains data about pets and their owners.\nFor the sake of demonstration, you can prepare a small dataset by creating a jsonlines\nfile via a unix command line tool:\n```bash\nprintf \"{\\\"id\\\":1,\\\"owner\\\":\\\"Alice\\\",\\\"pet\\\":\\\"dog\\\"}\n{\\\"id\\\":2,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"dog\\\"}\n{\\\"id\\\":3,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"cat\\\"}\n{\\\"id\\\":4,\\\"owner\\\":\\\"Bob\\\",\\\"pet\\\":\\\"cat\\\"}\" > dataset.jsonlines\n```\nIn order to read it into Pathway\u2019s table, you can first do the import and then\nuse the `pw.io.jsonlines.read` method:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.jsonlines.read(\"dataset.jsonlines\", schema=InputSchema, mode=\"static\")\n```\nThen, you can output the table in order to check the correctness of the read:\nCode\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner | pet\nAlice | dog\nBob | dog\nBob | cat\nBob | cat\n```\n::\n::\nNow let\u2019s try something different. Consider you have site access logs stored in a\nseparate folder in several files. For the sake of simplicity, a log entry contains\nan access ID, an IP address and the login of the user.\nA dataset, corresponding to the format described above can be generated, thanks to the\nfollowing set of unix commands:\n```bash\nmkdir logs\nprintf \"{\\\"id\\\":1,\\\"ip\\\":\\\"127.0.0.1\\\",\\\"login\\\":\\\"alice\\\"}\n{\\\"id\\\":2,\\\"ip\\\":\\\"8.8.8.8\\\",\\\"login\\\":\\\"alice\\\"}\" > logs/part_1.jsonlines\nprintf \"{\\\"id\\\":3,\\\"ip\\\":\\\"8.8.8.8\\\",\\\"login\\\":\\\"bob\\\"}\n{\\\"id\\\":4,\\\"ip\\\":\\\"127.0.0.1\\\",\\\"login\\\":\\\"alice\\\"}\" > logs/part_2.jsonlines\n```\nNow, let\u2019s see how you can use the connector in order to read the content of this\ndirectory into a table:\n```python\nclass InputSchema(pw.Schema):\n ip: str\n login: str\nt = pw.io.jsonlines.read(\"logs/\", schema=InputSchema, mode=\"static\")\n```\nThe only difference is that you specified the name of the directory instead of the\nfile name, as opposed to what you had done in the previous example. It\u2019s that simple!\nBut what if you are working with a real-time system, which generates logs all the time.\nThe logs are being written and after a while they get into the log directory (this is\nalso called \u201clogs rotation\u201d). Now, consider that there is a need to fetch the new files\nfrom this logs directory all the time. Would Pathway handle that? Sure!\nThe only difference would be in the usage of `mode` flag. So the code\nsnippet will look as follows:\n```python\nclass InputSchema(pw.Schema):\n ip: str\n login: str\nt = pw.io.jsonlines.read(\"logs/\", schema=InputSchema, mode=\"streaming\")\n```\nWith this method, you obtain a table updated dynamically. The changes in the logs would incur\nchanges in the Business-Intelligence \u2018BI\u2019-ready data, namely, in the tables you would like to output. To see\nhow these changes are reported by Pathway, have a look at the\n\u201cStreams of Updates and Snapshots\u201d\narticle.\npw.io.jsonlines.write(table, filename)\nWrites `table`\u2019s stream of updates to a file in jsonlines format.\n* Parameters\n * table (`Table`) \u2013 Table to be written.\n * filename (`str` | `PathLike`) \u2013 Path to the target output file.\n* Returns\n None\nExample:\nIn this simple example you can see how table output works.\nFirst, import Pathway and create a table:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nConsider you would want to output the stream of changes of this table. In order to do that\nyou simply do:\n```python\npw.io.jsonlines.write(t, \"table.jsonlines\")\n```\nNow, let\u2019s see what you have on the output:\n```bash\ncat table.jsonlines\n```\n```json\n{\"age\":10,\"owner\":\"Alice\",\"pet\":\"dog\",\"diff\":1,\"time\":0}\n{\"age\":9,\"owner\":\"Bob\",\"pet\":\"cat\",\"diff\":1,\"time\":0}\n{\"age\":8,\"owner\":\"Alice\",\"pet\":\"cat\",\"diff\":1,\"time\":0}\n```\nThe columns age, owner and pet clearly represent the data columns you have. The\ncolumn time represents the number of operations minibatch, in which each of the\nrows was read. In this example, since the data is static: you have 0. The diff is\nanother element of this stream of updates. In this context, it is 1 because all\nthree rows were read from the input. All in all, the extra information in `time` and\n`diff` columns - in this case - shows us that in the initial minibatch (`time = 0`),\nyou have read three rows and all of them were added to the collection (`diff = 1`).\n"} -{"doc": "Notes\nThe CSV files should follow a standard CSV settings: the separator is \u2018,\u2019, the\nquotechar is \u2018\u201d\u2019, and there is no escape.\npw.demo.replay_csv_with_time(path, *, schema, time_column, unit='s', autocommit_ms=100, speedup=1)\nReplay a static CSV files as a data stream while respecting the time between updated based on a timestamp columns.\nThe timestamps in the file should be ordered positive integers.\n* Parameters\n * path (`str`) \u2013 Path to the file to stream.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * time_column (`str`) \u2013 Column containing the timestamps.\n * unit (`str`) \u2013 Unit of the timestamps. Only \u2018s\u2019, \u2018ms\u2019, \u2018us\u2019, and \u2018ns\u2019 are supported. Defaults to \u2018s\u2019.\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * speedup (`float`) \u2013 Produce stream speedup times faster than it would result from the time column.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "---\ntitle: pathway.demo package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.demo package\nPathway demo module\nTypical use:\n```python\nclass InputSchema(pw.Schema):\n name: str\n age: int\npw.demo.replay_csv(\"./input_stream.csv\", schema=InputSchema)\n```\n::\nResult\n```\n, 'age': }>\n```\n::\n::\nFunctions\npw.demo.generate_custom_stream(value_generators, *, schema, nb_rows=None, autocommit_duration_ms=1000, input_rate=1.0, persistent_id=None)\nGenerates a data stream.\nThe generator creates a table and periodically streams rows.\nIf a `nb_rows` value is provided, there are `nb_rows` row generated in total,\nelse the generator streams indefinitely.\nThe rows are generated iteratively and have an associated index x, starting from 0.\nThe values of each column are generated by their associated function in `value_generators`.\n* Parameters\n * value_generators (`dict`\\[`str`, `Any`\\]) \u2013 Dictionary mapping column names to functions that generate values for each column.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * nb_rows (`Optional`\\[`int`\\]) \u2013 The number of rows to generate. Defaults to None. If set to None, the generator\n generates streams indefinitely.\n * types \u2013 Dictionary containing the mapping between the columns and the data types (`pw.Type`) of the values of those columns. This parameter is optional, and if not provided the default type is `pw.Type.ANY`.\n * autocommit_duration_ms (`int`) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * input_rate (`float`) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *Table* \u2013 The generated table.\nExample:\n"} -{"doc": "---\ntitle: pathway.demo package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.demo package\nPathway demo module\nTypical use:\n```python\nvalue_functions = {\n 'number': lambda x: x + 1,\n 'name': lambda x: f'Person {x}',\n 'age': lambda x: 20 + x,\n}\nclass InputSchema(pw.Schema):\n number: int\n name: str\n age: int\npw.demo.generate_custom_stream(value_functions, schema=InputSchema, nb_rows=10)\n```\n::\nResult\n```\n, 'name': , 'age': }>\n```\n::\n::\nIn the above example, a data stream is generated with 10 rows, where each row has columns \u2018number\u2019, \u2018name\u2019, and \u2018age\u2019.\nThe \u2018number\u2019 column contains values incremented by 1 from 1 to 10, the \u2018name\u2019 column contains \u2018Person\u2019\nfollowed by the respective row index, and the \u2018age\u2019 column contains values starting from 20 incremented by\nthe row index.\npw.demo.noisy_linear_stream(nb_rows=10, input_rate=1.0)\nGenerates an artificial data stream for the linear regression tutorial.\n* Parameters\n * nb_rows (*int, optional*) \u2013 The number of rows to generate in the data stream. Defaults to 10.\n * input_rate (*float, optional*) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *pw.Table* \u2013 A table containing the generated data stream.\nExample:\n```python\ntable = pw.demo.noisy_linear_stream(nb_rows=100, input_rate=2.0)\n```\nIn the above example, an artificial data stream is generated with 100 rows. Each row has two columns, \u2018x\u2019 and \u2018y\u2019.\nThe \u2018x\u2019 values range from 0 to 99, and the \u2018y\u2019 values are equal to \u2018x\u2019 plus some random noise.\npw.demo.range_stream(nb_rows=30, offset=0, input_rate=1.0)\nGenerates a simple artificial data stream, used to compute the sum in our examples.\n* Parameters\n * nb_rows (*int, optional*) \u2013 The number of rows to generate in the data stream. Defaults to 30.\n * offset (*int, optional*) \u2013 The offset value added to the generated \u2018value\u2019 column. Defaults to 0.\n * input_rate (*float, optional*) \u2013 The rate at which rows are generated per second. Defaults to 1.0.\n* Returns\n *pw.Table* \u2013 a table containing the generated data stream.\nExample:\n```python\ntable = pw.demo.range_stream(nb_rows=50, offset=10, input_rate=2.5)\n```\nIn the above example, an artificial data stream is generated with a single column \u2018value\u2019 and 50 rows.\nThe \u2018value\u2019 column contains values ranging from \u2018offset\u2019 (10 in this case) to \u2018nb_rows\u2019 + \u2018offset\u2019 (60).\npw.demo.replay_csv(path, *, schema, input_rate=1.0)\nReplay a static CSV files as a data stream.\n* Parameters\n * path (`str` | `PathLike`) \u2013 Path to the file to stream.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema of the resulting table.\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * input_rate (*float, optional*) \u2013 The rate at which rows are read per second. Defaults to 1.0.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "pathway.stdlib.temporal.utils module\npw.temporal.utils.check_joint_types(parameters)\nChecks if all parameters have types that allow to execute a function.\nIf parameters are {\u2018a\u2019: (a, TimeEventType), \u2018b\u2019: (b, IntervalType)} then\nthe following pairs of types are allowed for (a, b): (int, int), (float, float),\n(datetime.datetime, datetime.timedelta)\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n how=pw.JoinMode.LEFT,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\nclass pw.temporal.AsofNowJoinResult(original_left, left, right, join_result, table_substitution, mode, id)\nResult of an asof now join between tables.\nselect(*args, kwargs)\nComputes a result of an asof now join.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`ColumnExpression`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nclass pw.temporal.CommonBehavior(delay, cutoff, keep_results)\nDefines temporal behavior of windows and temporal joins.\nclass pw.temporal.Direction(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.temporal.IntervalJoinResult(left_bucketed, right_bucketed, earlier_part_filtered, later_part_filtered, table_substitution, mode, _filter_out_results_of_forgetting)\nResult of an interval join between tables.\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\njoin_result = t1.interval_join_inner(t2, t1.t, t2.t, pw.temporal.interval(-2, 1))\nisinstance(join_result, pw.temporal.IntervalJoinResult)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\npw.debug.compute_and_print(\n join_result.select(left_t=t1.t, right_t=t2.t), include_id=False\n)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\nselect(*args, kwargs)\nComputes a result of an interval join.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`Any`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_inner(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\nclass pw.temporal.Window()\nclass pw.temporal.WindowJoinResult(join_result, left_original, right_original, left_new, right_new)\nResult of a window join between tables.\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\njoin_result = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2))\nisinstance(join_result, pw.temporal.WindowJoinResult)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\npw.debug.compute_and_print(\n join_result.select(left_t=t1.t, right_t=t2.t), include_id=False\n)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\nselect(*args, kwargs)\nComputes a result of a window join.\n:type args: `ColumnReference`\n:param args: Column references.\n:type kwargs: `Any`\n:param kwargs: Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n>>> import pathway as pw\n>>> t1 = pw.debug.table_from_markdown(\n\u2026 \u2018\u2019\u2019\n\u2026 | a | t\n\u2026 1 | 1 | 1\n\u2026 2 | 1 | 2\n\u2026 3 | 1 | 3\n\u2026 4 | 1 | 7\n\u2026 5 | 1 | 13\n\u2026 6 | 2 | 1\n\u2026 7 | 2 | 2\n\u2026 8 | 3 | 4\n\u2026 \u2018\u2019\u2019\n\u2026 )\n>>> t2 = pw.debug.table_from_markdown(\n\u2026 \u2018\u2019\u2019\n\u2026 | b | t\n\u2026 1 | 1 | 2\n\u2026 2 | 1 | 5\n\u2026 3 | 1 | 6\n\u2026 4 | 1 | 7\n\u2026 5 | 2 | 2\n\u2026 6 | 2 | 3\n\u2026 7 | 4 | 3\n\u2026 \u2018\u2019\u2019\n\u2026 )\n>>> t3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n\u2026 key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t\n\u2026 )\n>>> pw.debug.compute_and_print(t3, include_id=False)\nkey | left_t | right_t\n1 | | 5\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n4 | | 3\nFunctions\npw.temporal.asof_join(self, other, self_time, other_time, *on, how, defaults={}, direction=Direction.BACKWARD)\nPerform an ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * how (`JoinMode`) \u2013 mode of the join (LEFT, RIGHT, FULL)\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n how=pw.JoinMode.LEFT,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\npw.temporal.asof_join_left(self, other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform a left ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_left(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\npw.temporal.asof_join_outer(self, other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform an outer ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_outer(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t1.val: -1, t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 2 | 1 | 0 | 1\n0 | 3 | 1 | 6 | 7\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 2 | 7\n0 | 7 | 5 | 6 | 11\n0 | 8 | 5 | 3 | 8\n0 | 9 | 5 | 9 | 14\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n0 | 13 | 7 | 7 | 14\n0 | 14 | 7 | 4 | 11\n1 | 2 | -1 | 7 | 6\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n1 | 8 | 9 | 3 | 12\n```\n::\n::\npw.temporal.asof_join_right(self, other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform a right ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_right(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t1.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 2 | 1 | 0 | 1\n0 | 3 | 1 | 6 | 7\n0 | 7 | 5 | 2 | 7\n0 | 8 | 5 | 3 | 8\n0 | 9 | 5 | 9 | 14\n0 | 13 | 7 | 7 | 14\n0 | 14 | 7 | 4 | 11\n1 | 2 | -1 | 7 | 6\n1 | 8 | 9 | 3 | 12\n```\n::\n::\npw.temporal.asof_now_join(self, other, *on, how=JoinMode.INNER, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. Rows from self are not stored.\nThey are joined with rows of other at their processing time. If other is updated\nin the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT}\n which correspond to inner and left join respectively.\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\npw.temporal.asof_now_join_inner(self, other, *on, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. Rows from self are not stored.\nThey are joined with rows of other at their processing time. If other is updated\nin the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\npw.temporal.asof_now_join_left(self, other, *on, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. If there are no matching\nrows in other, missing values on the right side are replaced with None.\nRows from self are not stored. They are joined with rows of other at their processing\ntime. If other is updated in the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\npw.temporal.common_behavior(delay=None, cutoff=None, keep_results=True)\nCreates CommonBehavior\n* Parameters\n * delay (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Optional; for windows, delays initial output by `delay` with respect to the\n beginning of the window. Setting it to `None` does not enable\n delaying mechanism.\n For interval joins, it delays the time the record is joined by `delay`.\n Using delay is useful when updates are too frequent.\n * cutoff (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Optional; for windows, stops updating windows which end earlier than maximal\n seen time minus `cutoff`. Setting cutoff to `None` does not enable\n cutoff mechanism.\n For interval joins, it ignores entries that are older\n than maximal seen time minus `cutoff`. This parameter is also used to clear\n memory. It allows to release memory used by entries that won\u2019t change.\n * keep_results (`bool`) \u2013 If set to True, keeps all results of the operator. If set to False,\n keeps only results that are newer than maximal seen time minus `cutoff`.\n Can\u2019t be set to `False`, when `cutoff` is `None`.\npw.temporal.interval(lower_bound, upper_bound)\nAllows testing whether two times are within a certain distance.\nNOTE: Usually used as an argument of .interval_join().\n* Parameters\n * lower_bound (`int` | `float` | `timedelta`) \u2013 a lower bound on other_time - self_time.\n * upper_bound (`int` | `float` | `timedelta`) \u2013 an upper bound on other_time - self_time.\n* Returns\n *Window* \u2013 object to pass as an argument to .interval_join()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\npw.temporal.interval_join(self, other, self_time, other_time, interval, *on, behavior=None, how=JoinMode.INNER)\nPerforms an interval join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n * how (`JoinMode`) \u2013 decides whether to run interval_join_inner, interval_join_left, interval_join_right\n or interval_join_outer. Default is INNER.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b, how=pw.JoinMode.INNER\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\npw.temporal.interval_join_inner(self, other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_inner(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_inner(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\npw.temporal.interval_join_left(self, other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval left join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows from the left\nside that haven\u2019t been matched with the right side are returned with missing\nvalues on the right side replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_left(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n11 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_left(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n1 | 11 |\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n3 | 4 |\n```\n::\n::\npw.temporal.interval_join_outer(self, other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval outer join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows that haven\u2019t\nbeen matched with the other side are returned with missing values on the other\nside replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_outer(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 0\n | 7\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n11 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_outer(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n | | 0\n | | 2\n | | 7\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n1 | 11 |\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n3 | 4 |\n```\n::\n::\npw.temporal.interval_join_right(self, other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval right join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows from the right\nside that haven\u2019t been matched with the left side are returned with missing\nvalues on the left side replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_right(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 0\n | 7\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_right(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n | | 0\n | | 2\n | | 7\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\npw.temporal.intervals_over(*, at, lower_bound, upper_bound, is_outer=True)\nAllows grouping together elements within a window.\nWindows are created for each time t in at, by taking values with times\nwithin \\[t+lower_bound, t+upper_bound\\].\nNote: If a tuple reducer will be used on grouped elements within a window, values\nin the tuple will be sorted according to their time column.\n* Parameters\n * lower_bound (`int` | `float` | `timedelta`) \u2013 lower bound for interval\n * upper_bound (`int` | `float` | `timedelta`) \u2013 upper bound for interval\n * at (`ColumnReference`) \u2013 column of times for which windows are to be created\n * is_outer (`bool`) \u2013 decides whether empty windows should return None or be omitted\n* Returns\n *Window* \u2013 object to pass as an argument to .windowby()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | t | v\n1 | 1 | 10\n2 | 2 | 1\n3 | 4 | 3\n4 | 8 | 2\n5 | 9 | 4\n6 | 10| 8\n7 | 1 | 9\n8 | 2 | 16\n''')\nprobes = pw.debug.table_from_markdown(\n'''\nt\n2\n4\n6\n8\n10\n''')\nresult = (\n pw.temporal.windowby(t, t.t, window=pw.temporal.intervals_over(\n at=probes.t, lower_bound=-2, upper_bound=1\n ))\n .reduce(pw.this._pw_window_location, v=pw.reducers.tuple(pw.this.v))\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\n_pw_window_location | v\n2 | (9, 10, 16, 1)\n4 | (16, 1, 3)\n6 | (3,)\n8 | (2, 4)\n10 | (2, 4, 8)\n```\n::\n::\npw.temporal.session(*, predicate=None, max_gap=None)\nAllows grouping together elements within a window across ordered time-like\ndata column by locally grouping adjacent elements either based on a maximum time\ndifference or using a custom predicate.\nNOTE: Usually used as an argument of .windowby().\nExactly one of the arguments predicate or max_gap should be provided.\n* Parameters\n * predicate (`Optional`\\[`Callable`\\[\\[`Any`, `Any`\\], `bool`\\]\\]) \u2013 function taking two adjacent entries that returns a boolean saying\n whether the two entries should be grouped\n * max_gap (`UnionType`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Two adjacent entries will be grouped if b - a < max_gap\n* Returns\n *Window* \u2013 object to pass as an argument to .windowby()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t | v\n1 | 0 | 1 | 10\n2 | 0 | 2 | 1\n3 | 0 | 4 | 3\n4 | 0 | 8 | 2\n5 | 0 | 9 | 4\n6 | 0 | 10| 8\n7 | 1 | 1 | 9\n8 | 1 | 2 | 16\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.session(predicate=lambda a, b: abs(a-b) <= 1), shard=t.shard\n).reduce(\npw.this._pw_shard,\npw.this._pw_window_start,\npw.this._pw_window_end,\nmin_t=pw.reducers.min(pw.this.t),\nmax_v=pw.reducers.max(pw.this.v),\ncount=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\n_pw_shard | _pw_window_start | _pw_window_end | min_t | max_v | count\n0 | 1 | 2 | 1 | 10 | 2\n0 | 4 | 4 | 4 | 3 | 1\n0 | 8 | 10 | 8 | 8 | 3\n1 | 1 | 2 | 1 | 16 | 2\n```\n::\n::\npw.temporal.sliding(hop, duration=None, ratio=None, offset=None)\nAllows grouping together elements within a window of a given length sliding\nacross ordered time-like data column according to a specified interval (hop)\nstarting from a given offset.\nNOTE: Usually used as an argument of .windowby().\nExactly one of the arguments hop or ratio should be provided.\n* Parameters\n * hop (`int` | `float` | `timedelta`) \u2013 frequency of a window\n * duration (`UnionType`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 length of the window\n * ratio (`Optional`\\[`int`\\]) \u2013 used as an alternative way to specify duration as hop \\* ratio\n * offset (`UnionType`\\[`int`, `float`, `datetime`, `None`\\]) \u2013 beginning of the first window\n* Returns\n *Window* \u2013 object to pass as an argument to .windowby()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t\n 1 | 0 | 12\n 2 | 0 | 13\n 3 | 0 | 14\n 4 | 0 | 15\n 5 | 0 | 16\n 6 | 0 | 17\n 7 | 1 | 10\n 8 | 1 | 11\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.sliding(duration=10, hop=3), shard=t.shard\n).reduce(\n pw.this._pw_shard,\n pw.this._pw_window_start,\n pw.this._pw_window_end,\n min_t=pw.reducers.min(pw.this.t),\n max_t=pw.reducers.max(pw.this.t),\n count=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\n_pw_shard | _pw_window_start | _pw_window_end | min_t | max_t | count\n0 | 3 | 13 | 12 | 12 | 1\n0 | 6 | 16 | 12 | 15 | 4\n0 | 9 | 19 | 12 | 17 | 6\n0 | 12 | 22 | 12 | 17 | 6\n0 | 15 | 25 | 15 | 17 | 3\n1 | 3 | 13 | 10 | 11 | 2\n1 | 6 | 16 | 10 | 11 | 2\n1 | 9 | 19 | 10 | 11 | 2\n```\n::\n::\npw.temporal.tumbling(duration, offset=None)\nAllows grouping together elements within a window of a given length tumbling\nacross ordered time-like data column starting from a given offset.\nNOTE: Usually used as an argument of .windowby().\n* Parameters\n * duration (`int` | `float` | `timedelta`) \u2013 length of the window\n * offset (`UnionType`\\[`int`, `float`, `datetime`, `None`\\]) \u2013 beginning of the first window\n* Returns\n *Window* \u2013 object to pass as an argument to .windowby()\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t\n 1 | 0 | 12\n 2 | 0 | 13\n 3 | 0 | 14\n 4 | 0 | 15\n 5 | 0 | 16\n 6 | 0 | 17\n 7 | 1 | 12\n 8 | 1 | 13\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.tumbling(duration=5), shard=t.shard\n).reduce(\n pw.this._pw_shard,\n pw.this._pw_window_start,\n pw.this._pw_window_end,\n min_t=pw.reducers.min(pw.this.t),\n max_t=pw.reducers.max(pw.this.t),\n count=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\n_pw_shard | _pw_window_start | _pw_window_end | min_t | max_t | count\n0 | 10 | 15 | 12 | 14 | 3\n0 | 15 | 20 | 15 | 17 | 3\n1 | 10 | 15 | 12 | 13 | 2\n```\n::\n::\npw.temporal.window_join(self, other, self_time, other_time, window, *on, how=JoinMode.INNER)\nPerforms a window join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * how (`JoinMode`) \u2013 decides whether to run window_join_inner, window_join_left, window_join_right\n or window_join_outer. Default is INNER.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n```\n::\n::\npw.temporal.window_join_inner(self, other, self_time, other_time, window, *on)\nPerforms a window join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_inner(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_inner(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n```\n::\n::\npw.temporal.window_join_left(self, other, self_time, other_time, window, *on)\nPerforms a window left join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from the left side that didn\u2019t match with any record on the right side in\na given window, are returned with missing values on the right side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 |\n1 | 2\n2 | 2\n2 | 2\n3 |\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n13 |\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_left(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n10 |\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_left(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n2 | 7 |\n3 | 4 |\n```\n::\n::\npw.temporal.window_join_outer(self, other, self_time, other_time, window, *on)\nPerforms a window outer join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from both sides that didn\u2019t match with any record on the other side in\na given window, are returned with missing values on the other side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n | 5\n | 6\n1 |\n1 | 2\n2 | 2\n2 | 2\n3 |\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n13 |\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | | 5\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n4 | | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_outer(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | -3\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n10 |\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_outer(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | | 10\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n2 | 7 |\n3 | 4 |\n4 | | 3\n```\n::\n::\npw.temporal.window_join_right(self, other, self_time, other_time, window, *on)\nPerforms a window right join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from the right side that didn\u2019t match with any record on the left side in\na given window, are returned with missing values on the left side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt4 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n | 5\n | 6\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t2.b, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | | 5\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n4 | | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_right(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | -3\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_right(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t2.b, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | | 10\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n4 | | 3\n```\n::\n::\npw.temporal.windowby(self, time_expr, *, window, behavior=None, shard=None)\nCreate a GroupedTable by windowing the table (based on expr and window),\noptionally sharded with shard\n* Parameters\n * time_expr (`ColumnExpression`) \u2013 Column expression used for windowing\n * window (`Window`) \u2013 type window to use\n * shard (`Optional`\\[`ColumnExpression`\\]) \u2013 optional column expression to act as a shard key\nExamples:\n"} -{"doc": "---\ntitle: pathway.stdlib.temporal package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.temporal package\nclass pw.temporal.AsofJoinResult(side_data, mode, defaults, direction)\nResult of an ASOF join of two tables\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t | v\n1 | 0 | 1 | 10\n2 | 0 | 2 | 1\n3 | 0 | 4 | 3\n4 | 0 | 8 | 2\n5 | 0 | 9 | 4\n6 | 0 | 10| 8\n7 | 1 | 1 | 9\n8 | 1 | 2 | 16\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.session(predicate=lambda a, b: abs(a-b) <= 1), shard=t.shard\n).reduce(\npw.this.shard,\nmin_t=pw.reducers.min(pw.this.t),\nmax_v=pw.reducers.max(pw.this.v),\ncount=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nshard | min_t | max_v | count\n0 | 1 | 10 | 2\n0 | 4 | 3 | 1\n0 | 8 | 8 | 3\n1 | 1 | 16 | 2\n```\n::\n::"} -{"doc": "pathway.stdlib.temporal.temporal_behavior module\nclass pw.temporal.temporal_behavior.Behavior()\nA superclass of all classes defining temporal behavior.\nclass pw.temporal.temporal_behavior.CommonBehavior(delay, cutoff, keep_results)\nDefines temporal behavior of windows and temporal joins.\nclass pw.temporal.temporal_behavior.ExactlyOnceBehavior(shift)\npw.temporal.temporal_behavior.common_behavior(delay=None, cutoff=None, keep_results=True)\nCreates CommonBehavior\n* Parameters\n * delay (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Optional; for windows, delays initial output by `delay` with respect to the\n beginning of the window. Setting it to `None` does not enable\n delaying mechanism.\n For interval joins, it delays the time the record is joined by `delay`.\n Using delay is useful when updates are too frequent.\n * cutoff (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 Optional; for windows, stops updating windows which end earlier than maximal\n seen time minus `cutoff`. Setting cutoff to `None` does not enable\n cutoff mechanism.\n For interval joins, it ignores entries that are older\n than maximal seen time minus `cutoff`. This parameter is also used to clear\n memory. It allows to release memory used by entries that won\u2019t change.\n * keep_results (`bool`) \u2013 If set to True, keeps all results of the operator. If set to False,\n keeps only results that are newer than maximal seen time minus `cutoff`.\n Can\u2019t be set to `False`, when `cutoff` is `None`.\npw.temporal.temporal_behavior.exactly_once_behavior(shift=None)\nCreates an instance of class ExactlyOnceBehavior, indicating that each non empty\nwindow should produce exactly one output.\n* Parameters\n * shift (`Union`\\[`int`, `float`, `timedelta`, `None`\\]) \u2013 optional, defines the moment in time (`window end + shift`) in which\n * output. (*the window stops accepting the data and sends the results to the*) \u2013 \n * shift=0. (*Setting it to None is interpreted as*) \u2013 \nRemark:\n note that setting a non-zero shift and demanding exactly one output results in\n the output being delivered only when the time in the time column reaches\n `window end + shift`.\n"} -{"doc": "---\ntitle: pathway.stdlib.ordered package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ordered package\nFunctions\npw.ordered.diff(self, timestamp, *values)\nCompute the difference between the values in the `values` columns and the previous values\naccording to the order defined by the column `timestamp`.\n* Parameters\n * timestamp (*-*) \u2013 The column reference to the `timestamp` column on\n which the order is computed.\n * \\*values (*-*) \u2013 Variable-length argument representing the column\n references to the `values` columns.\n* Returns\n `Table` \u2013 A new table where each column is replaced with a new column containing\n the difference and whose name is the concatenation of diff_ and the former name.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference.\nNOTE: * The value of the \u201cfirst\u201d value (the row with the lower value\n in the `timestamp` column) is `None`.\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values\n1 | 1\n2 | 2\n3 | 4\n4 | 7\n5 | 11\n6 | 16\n''')\ntable += table.diff(pw.this.timestamp, pw.this.values)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values | diff_values\n1 | 1 |\n2 | 2 | 1\n3 | 4 | 2\n4 | 7 | 3\n5 | 11 | 4\n6 | 16 | 5\n```\n::\n::"} -{"doc": "pathway.stdlib.ordered.diff module\npw.ordered.diff.diff(self, timestamp, *values)\nCompute the difference between the values in the `values` columns and the previous values\naccording to the order defined by the column `timestamp`.\n* Parameters\n * timestamp (*-*) \u2013 The column reference to the `timestamp` column on\n which the order is computed.\n * \\*values (*-*) \u2013 Variable-length argument representing the column\n references to the `values` columns.\n* Returns\n `Table` \u2013 A new table where each column is replaced with a new column containing\n the difference and whose name is the concatenation of diff_ and the former name.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference.\nNOTE: * The value of the \u201cfirst\u201d value (the row with the lower value\n in the `timestamp` column) is `None`.\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values\n1 | 1\n2 | 2\n3 | 4\n4 | 7\n5 | 11\n6 | 16\n''')\ntable += table.diff(pw.this.timestamp, pw.this.values)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values | diff_values\n1 | 1 |\n2 | 2 | 1\n3 | 4 | 2\n4 | 7 | 3\n5 | 11 | 4\n6 | 16 | 5\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.io.s3 package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.s3 package\nclass pw.io.s3.AwsS3Settings(*, bucket_name=None, access_key=None, secret_access_key=None, with_path_style=False, region=None, endpoint=None)\nStores Amazon S3 connection settings. You may also use this class to store\nconfiguration settings for any custom S3 installation, however you will need to\nspecify the region and the endpoint.\n* Parameters\n * bucket_name \u2013 Name of S3 bucket.\n * access_key \u2013 Access key for the bucket.\n * secret_access_key \u2013 Secret access key for the bucket.\n * with_path_style \u2013 Whether to use path-style requests.\n * region \u2013 Region of the bucket.\n * endpoint \u2013 Custom endpoint in case of self-hosted storage.\nclassmethod new_from_path(s3_path)\nConstructs settings from S3 path. The engine will look for the credentials in\nenvironment variables and in local AWS profiles. It will also automatically\ndetect the region of the bucket.\nThis method may fail if there are no credentials or they are incorrect. It may\nalso fail if the bucket does not exist.\n* Parameters\n s3_path (`str`) \u2013 full path to the object in the form `s3:///`.\n* Returns\n Configuration object.\nclass pw.io.s3.DigitalOceanS3Settings(bucket_name, *, access_key=None, secret_access_key=None, region=None)\nStores Digital Ocean S3 connection settings.\n* Parameters\n * bucket_name \u2013 Name of Digital Ocean S3 bucket.\n * access_key \u2013 Access key for the bucket.\n * secret_access_key \u2013 Secret access key for the bucket.\n * region \u2013 Region of the bucket.\nclass pw.io.s3.WasabiS3Settings(bucket_name, *, access_key=None, secret_access_key=None, region=None)\nStores Wasabi S3 connection settings.\n* Parameters\n * bucket_name \u2013 Name of Wasabi S3 bucket.\n * access_key \u2013 Access key for the bucket.\n * secret_access_key \u2013 Secret access key for the bucket.\n * region \u2013 Region of the bucket.\nFunctions\npw.io.s3.read(path, format, *, aws_s3_settings=None, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from one or several objects in Amazon S3 bucket in the given\nformat.\nIn case the prefix of S3 path is specified, and there are several objects lying\nunder this prefix, their order is determined according to their modification times:\nthe smaller the modification time is, the earlier the file will be passed to the\nengine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in Amazon S3 bucket.\n * aws_s3_settings (`Optional`\\[`AwsS3Settings`\\]) \u2013 Connection parameters for the S3 account and the bucket.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d and \u201cplaintext\u201d\n formats are supported.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new objects under the\n given path prefix. Set it to \u201cstatic\u201d, it will only consider the available\n data and ingest all of it. Default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nLet\u2019s consider an object store, which is hosted in Amazon S3. The store contains\ndatasets in the respective bucket and is located in the region eu-west-3. The goal\nis to read the dataset, located under the path `animals/` in this bucket.\nLet\u2019s suppose that the format of the dataset rows is jsonlines.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.s3.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3.AwsS3Settings(\n bucket_name=\"datasets\",\n region=\"eu-west-3\",\n access_key=os.environ[\"S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"S3_SECRET_ACCESS_KEY\"],\n ),\n format=\"json\",\n schema=InputSchema,\n)\n```\nIn case you are dealing with a public bucket, the parameters `access_key` and\n`secret_access_key` can be omitted. In this case, the read part will look as\nfollows:\n```python\nt = pw.io.s3.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3.AwsS3Settings(\n bucket_name=\"datasets\",\n region=\"eu-west-3\",\n ),\n format=\"json\",\n schema=InputSchema,\n)\n```\npw.io.s3.read_from_digital_ocean(path, do_s3_settings, format, *, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from one or several objects in Digital Ocean S3 bucket.\nIn case the prefix of S3 path is specified, and there are several objects lying\nunder this prefix, their order is determined according to their modification times:\nthe smaller the modification time is, the earlier the file will be passed to the\nengine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in S3 bucket.\n * do_s3_settings (`DigitalOceanS3Settings`) \u2013 Connection parameters for the account and the bucket.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d and \u201cplaintext\u201d\n formats are supported.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new objects under the\n given path prefix. Set it to \u201cstatic\u201d, it will only consider the available\n data and ingest all of it. Default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nLet\u2019s consider an object store, which is hosted in Digital Ocean S3. The store\ncontains CSV datasets in the respective bucket and is located in the region ams3.\nThe goal is to read the dataset, located under the path `animals/` in this bucket.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.s3.read_from_digital_ocean(\n \"animals/\",\n do_s3_settings=pw.io.s3.DigitalOceanS3Settings(\n bucket_name=\"datasets\",\n region=\"ams3\",\n access_key=os.environ[\"DO_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"DO_S3_SECRET_ACCESS_KEY\"],\n ),\n format=\"csv\",\n schema=InputSchema,\n)\n```\npw.io.s3.read_from_wasabi(path, wasabi_s3_settings, format, *, schema=None, mode='streaming', csv_settings=None, json_field_paths=None, persistent_id=None, autocommit_duration_ms=1500, debug_data=None)\nReads a table from one or several objects in Wasabi S3 bucket.\nIn case the prefix of S3 path is specified, and there are several objects lying under\nthis prefix, their order is determined according to their modification times: the\nsmaller the modification time is, the earlier the file will be passed to the engine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in S3 bucket.\n * wasabi_s3_settings (`WasabiS3Settings`) \u2013 Connection parameters for the account and the bucket.\n * format (`str`) \u2013 Format of data to be read. Currently \u201ccsv\u201d, \u201cjson\u201d and \u201cplaintext\u201d\n formats are supported.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new objects under the\n given path prefix. Set it to \u201cstatic\u201d, it will only consider the available\n data and ingest all of it. Default value is \u201cstreaming\u201d.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 Settings for the CSV parser. This parameter is used only in case\n the specified format is \u201ccsv\u201d.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is \u201cjson\u201d, this field allows to map field names\n into path in the read json object. For the field which require such mapping,\n it should be given in the format `: `,\n where the path to be mapped needs to be a\n JSON Pointer (RFC 6901).\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data (`Any`) \u2013 Static data replacing original one when debug mode is active.\n* Returns\n *Table* \u2013 The table read.\nExample:\nLet\u2019s consider an object store, which is hosted in Wasabi S3. The store\ncontains CSV datasets in the respective bucket and is located in the region us-west-1.\nThe goal is to read the dataset, located under the path `animals/` in this bucket.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.s3.read_from_wasabi(\n \"animals/\",\n wasabi_s3_settings=pw.io.s3.WasabiS3Settings(\n bucket_name=\"datasets\",\n region=\"us-west-1\",\n access_key=os.environ[\"WASABI_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"WASABI_S3_SECRET_ACCESS_KEY\"],\n ),\n format=\"csv\",\n schema=InputSchema,\n)\n```\n"} -{"doc": "---\ntitle: pathway.persistence package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.persistence package\nclass pw.persistence.Backend(engine_data_storage, fs_path=None)\nThe settings of a backend, which is used to persist the computation state. There\nare two kinds of data backends: metadata backend and snapshot backend. Both are\nconfigurable via this class.\nclassmethod filesystem(path)\nConfigure the filesystem backend.\n* Parameters\n path (`str` | `PathLike`\\[`str`\\]) \u2013 the path to the root directory in the file system, which will be used to store the persisted data.\n* Returns\n Class instance denoting the filesystem storage backend with root directory at `path`.\nclassmethod s3(root_path, bucket_settings)\nConfigure the S3 backend.\n* Parameters\n * root_path (`str`) \u2013 path to the root in the S3 storage, which will be used to store persisted data;\n * bucket_settings (`AwsS3Settings`) \u2013 the settings for S3 bucket connection in the same format as they are used by S3 connectors.\n* Returns\n Class instance denoting the S3 storage backend with root directory as\n `root_path` and connection settings given by `bucket_settings`.\nclass pw.persistence.Config(*, snapshot_interval_ms=0, metadata_storage, snapshot_storage, snapshot_access, replay_mode, continue_after_replay)\nConfigure the data persistence. An instance of this class should be passed as a\nparameter to pw.run in case persistence is enabled.\nPlease note that if you\u2019d like to use the same backend for both metadata and\nsnapshot storages, you can use the convenience method `simple_config`.\n* Parameters\n * metadata_storage (`Backend`) \u2013 metadata backend configuration;\n * snapshot_storage (`Backend`) \u2013 snapshots backend configuration;\n * snapshot_interval_ms (`int`) \u2013 the desired duration between snapshot updates in milliseconds;\nclassmethod simple_config(backend, snapshot_interval_ms=0, snapshot_access=, replay_mode=, continue_after_replay=True)\nConstruct config from a single instance of the `Backend` class, using this backend to persist metadata and snapshot.\n* Parameters\n * backend (`Backend`) \u2013 storage backend settings;\n * snapshot_interval_ms \u2013 the desired freshness of the persisted snapshot in milliseconds. The greater the value is, the more the amount of time that the snapshot may fall behind, and the less computational resources are required.\n* Returns\n Persistence config.\n"} -{"doc": "---\ntitle: pathway.stdlib.graphs.bellman_ford package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.graphs.bellman_ford package\nclass pw.graphs.bellman_ford.DistFromSource()\nclass pw.graphs.bellman_ford.Vertex()\n"} -{"doc": "pathway.stdlib.graphs.bellman_ford.impl module\nclass pw.graphs.bellman_ford.impl.Dist()\nclass pw.graphs.bellman_ford.impl.DistFromSource()\nclass pw.graphs.bellman_ford.impl.Vertex()\n"} -{"doc": "---\ntitle: pathway.io.kafka package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.kafka package\nFunctions\npw.io.kafka.read(rdkafka_settings, topic=None, *, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None, kwargs)\nGeneralized method to read the data from the given topic in Kafka.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of librdkafka.\n * topic (`UnionType`\\[`str`, `list`\\[`str`\\], `None`\\]) \u2013 Name of topic in Kafka from which the data should be read.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the field which require such mapping, it should be\n given in the format `: `, where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will\n be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table, required for format other than\n \u201craw\u201d. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n Otherwise, the primary key will be generated randomly.\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n Otherwise, the primary key will be generated randomly.\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named `data`.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider there is a queue in Kafka, running locally on port 9092. Our queue can\nuse SASL-SSL authentication over a SCRAM-SHA-256 mechanism. You can set up a queue\nwith similar parameters in Upstash. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nTo connect to the topic \u201canimals\u201d and accept messages, the connector must be used as follows, depending on the format:\nRaw version:\n```python\nimport pathway as pw\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"raw\",\n)\n```\nAll the data will be accessible in the column data.\nCSV version:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"csv\",\n schema=InputSchema\n)\n```\nIn case of CSV format, the first message must be the header:\n```csv\nowner,pet\n```\nThen, simple data rows are expected. For example:\n```csv\nAlice,cat\nBob,dog\n```\nThis way, you get a table which looks as follows:\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice cat\n Bob dog\n```\n::\n::\nJSON version:\n```python\nimport pathway as pw\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"json\",\n schema=InputSchema,\n)\n```\nFor the JSON connector, you can send these two messages:\n```json\n{\"owner\": \"Alice\", \"pet\": \"cat\"}\n{\"owner\": \"Bob\", \"pet\": \"dog\"}\n```\nThis way, you get a table which looks as follows:\n"} -{"doc": "---\ntitle: pathway.io.kafka package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.kafka package\nFunctions\npw.io.kafka.read(rdkafka_settings, topic=None, *, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None, kwargs)\nGeneralized method to read the data from the given topic in Kafka.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of librdkafka.\n * topic (`UnionType`\\[`str`, `list`\\[`str`\\], `None`\\]) \u2013 Name of topic in Kafka from which the data should be read.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the field which require such mapping, it should be\n given in the format `: `, where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will\n be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table, required for format other than\n \u201craw\u201d. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n Otherwise, the primary key will be generated randomly.\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n Otherwise, the primary key will be generated randomly.\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named `data`.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider there is a queue in Kafka, running locally on port 9092. Our queue can\nuse SASL-SSL authentication over a SCRAM-SHA-256 mechanism. You can set up a queue\nwith similar parameters in Upstash. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nTo connect to the topic \u201canimals\u201d and accept messages, the connector must be used as follows, depending on the format:\nRaw version:\n```python\nimport pathway as pw\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"raw\",\n)\n```\nAll the data will be accessible in the column data.\nCSV version:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"csv\",\n schema=InputSchema\n)\n```\nIn case of CSV format, the first message must be the header:\n```csv\nowner,pet\n```\nThen, simple data rows are expected. For example:\n```csv\nAlice,cat\nBob,dog\n```\nThis way, you get a table which looks as follows:\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice cat\n Bob dog\n```\n::\n::\nNow consider that the data about pets come in a more sophisticated way. For instance\nyou have an owner, kind and name of an animal, along with some physical measurements.\nThe JSON payload in this case may look as follows:\n```json\n{\n \"name\": \"Jack\",\n \"pet\": {\n \"animal\": \"cat\",\n \"name\": \"Bob\",\n \"measurements\": [100, 200, 300]\n }\n}\n```\nSuppose you need to extract a name of the pet and the height, which is the 2nd\n(1-based) or the 1st (0-based) element in the array of measurements. Then, you\nuse JSON Pointer and do a connector, which gets the data as follows:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n pet_name: str\n pet_height: int\nt = pw.io.kafka.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"json\",\n schema=InputSchema,\n json_field_paths={\n \"pet_name\": \"/pet/name\",\n \"pet_height\": \"/pet/measurements/1\"\n },\n)\n```\npw.io.kafka.read_from_upstash(endpoint, username, password, topic, *, read_only_new=False, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None)\nSimplified method to read data from Kafka instance hosted in Upstash. It requires\nendpoint address and topic along with credentials.\nRead starts from the beginning of the topic, unless the read_only_new parameter is\nset to True.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * endpoint (`str`) \u2013 Upstash endpoint for the sought queue, which can be found on \u201cDetails\u201d page.\n * username (`str`) \u2013 Username generated for this queue.\n * password (`str`) \u2013 Password generated for this queue. These credentials are also available on \u201cDetails\u201d page.\n * topic (`str`) \u2013 Name of topic in Kafka from which the data should be read.\n * read_only_new (`bool`) \u2013 If set to True only the entries which appear after the start of the program will be read. Otherwise, the read will be done from the beginning of thetopic.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 The maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the fields which require such mapping, it should be\n given in the format `: `, where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will\n be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named `data`.\nExample:\nConsider that there is a queue running in Upstash. Let\u2019s say the endpoint name is\n\u201chttps://example-endpoint.com:19092\u201d, topic is \u201ctest-topic\u201d and the credentials are\nstored in environment variables.\nSuppose that we need just to read the raw messages for the further processing. Then\nit can be done in the following way:\n```python\nimport os\nimport pathway as pw\nt = pw.io.kafka.read_from_upstash(\n endpoint=\"https://example-endpoint.com:19092\",\n topic=\"test-topic\",\n username=os.environ[\"KAFKA_USERNAME\"],\n password=os.environ[\"KAFKA_PASSWORD\"],\n)\n```\npw.io.kafka.simple_read(server, topic, *, read_only_new=False, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None)\nSimplified method to read data from Kafka. Only requires the server address and\nthe topic name. If you have any kind of authentication or require fine-tuning of the\nparameters, please use read method.\nRead starts from the beginning of the topic, unless the read_only_new parameter is\nset to True.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * server (`str`) \u2013 Address of the server.\n * topic (`str`) \u2013 Name of topic in Kafka from which the data should be read.\n * read_only_new (`bool`) \u2013 If set to True only the entries which appear after the start of the program will be read. Otherwise, the read will be done from the beginning of thetopic.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 The maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the fields which require such mapping, it should be\n given in the format `: `, where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will\n be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named `data`.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider that there\u2019s a Kafka queue running locally on the port 9092 and we need\nto read raw messages from the topic \u201ctest-topic\u201d. Then, it can be done in the\nfollowing way:\n```python\nimport pathway as pw\nt = pw.io.kafka.simple_read(\"localhost:9092\", \"test-topic\")\n```\npw.io.kafka.write(table, rdkafka_settings, topic_name, *, format='json', delimiter=',', kwargs)\nWrite a table to a given topic on a Kafka instance.\n* Parameters\n * table (`Table`) \u2013 the table to output.\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of\n librdkafka.\n * topic_name (`str`) \u2013 name of topic in Kafka to which the data should be sent.\n * format (`str`) \u2013 format of the input data, currently \u201cjson\u201d and \u201cdsv\u201d are supported.\n * delimiter (`str`) \u2013 field delimiter to be used in case of delimiter-separated values\n format.\n* Returns\n None\nLimitations:\nFor future proofing, the format is configurable, but (for now) only JSON is available.\nExample:\nConsider there is a queue in Kafka, running locally on port 9092. Our queue can\nuse SASL-SSL authentication over a SCRAM-SHA-256 mechanism. You can set up a queue\nwith similar parameters in Upstash. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nYou want to send a Pathway table t to the Kafka instance.\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nTo connect to the topic \u201canimals\u201d and send messages, the connector must be used as follows, depending on the format:\nJSON version:\n```python\npw.io.kafka.write(\n t,\n rdkafka_settings,\n \"animals\",\n format=\"json\",\n)\n```\nAll the updates of table t will be sent to the Kafka instance.\n"} -{"doc": "---\ntitle: pathway.io.python package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.python package\nclass pw.io.python.ConnectorSubject()\nAn abstract class allowing to create custom python connectors.\nCustom python connector can be created by extending this class and implementing\n`run()` function responsible for filling the buffer with data.\nThis function will be started by pathway engine in a separate thread.\nIn order to send a message one of the methods\n`next_json()`, `next_str()`, `next_bytes()` can be used.\nclose()\nSends a sentinel message.\nShould be called to indicate that no new messages will be sent.\ncommit()\nSends a commit message.\nnext_bytes(message)\nSends a message.\n* Parameters\n message (`bytes`) \u2013 bytes encoded json string.\nnext_json(message)\nSends a message.\n* Parameters\n message (`dict`) \u2013 Dict representing json.\nnext_str(message)\nSends a message.\n* Parameters\n message (`str`) \u2013 json string.\non_stop()\nCalled after the end of the `run()` function.\nstart()\nRuns a separate thread with function feeding data into buffer.\nShould not be called directly.\nFunctions\npw.io.python.read(subject, *, schema=None, format='json', autocommit_duration_ms=1500, debug_data=None, value_columns=None, primary_key=None, types=None, default_values=None, persistent_id=None)\nReads a table from a ConnectorSubject.\n* Parameters\n * subject (`ConnectorSubject`) \u2013 An instance of a `ConnectorSubject`.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format (`str`) \u2013 Format of the data produced by a subject, \u201cjson\u201d, \u201craw\u201d or \u201cbinary\u201d. In case of\n a \u201craw\u201d format, table with single \u201cdata\u201d column will be produced.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`int`) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table will be persisted or `None`, if there is no need to persist the state of this table. When a program restarts, it restores the state for all input tables according to what was saved for their `persistent_id`. This way it\u2019s possible to configure the start of computations from the moment they were terminated last time.\n* Returns\n *Table* \u2013 The table read.\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nisinstance(t1, pw.Table)\n```\n::\nResult\n```\nTrue\n```\n::\n::\nproperty C(: ColumnNamespace )\nReturns the namespace of all the columns of a joinable.\nAllows accessing column names that might otherwise be a reserved methods.\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\ntab = pw.debug.table_from_markdown('''\nage | owner | pet | filter\n10 | Alice | dog | True\n9 | Bob | dog | True\n8 | Alice | cat | False\n7 | Bob | dog | True\n''')\nisinstance(tab.C.age, pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\npw.debug.compute_and_print(tab.filter(tab.C.filter), include_id=False)\n```\n::\nResult\n```\nage | owner | pet | filter\n7 | Bob | dog | True\n9 | Bob | dog | True\n10 | Alice | dog | True\n```\n::\n::\nasof_join(other, self_time, other_time, *on, how, defaults={}, direction=Direction.BACKWARD)\nPerform an ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * how (`JoinMode`) \u2013 mode of the join (LEFT, RIGHT, FULL)\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n how=pw.JoinMode.LEFT,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\nasof_join_left(other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform a left ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_left(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 6 | 11\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n```\n::\n::\nasof_join_outer(other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform an outer ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_outer(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t1.val: -1, t2.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 1 | 1 | -1 | 0\n0 | 2 | 1 | 0 | 1\n0 | 3 | 1 | 6 | 7\n0 | 4 | 2 | 6 | 8\n0 | 5 | 3 | 6 | 9\n0 | 6 | 4 | 6 | 10\n0 | 7 | 5 | 2 | 7\n0 | 7 | 5 | 6 | 11\n0 | 8 | 5 | 3 | 8\n0 | 9 | 5 | 9 | 14\n0 | 11 | 6 | 9 | 15\n0 | 12 | 7 | 9 | 16\n0 | 13 | 7 | 7 | 14\n0 | 14 | 7 | 4 | 11\n1 | 2 | -1 | 7 | 6\n1 | 5 | 8 | 7 | 15\n1 | 7 | 9 | 7 | 16\n1 | 8 | 9 | 3 | 12\n```\n::\n::\nasof_join_right(other, self_time, other_time, *on, defaults={}, direction=Direction.BACKWARD)\nPerform a right ASOF join of two tables.\n* Parameters\n * other (`Table`) \u2013 Table to join with self, both must contain a column val\n * self_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * other_time (`ColumnExpression`) \u2013 time-like column expression to do the join against\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * defaults (`dict`\\[`ColumnReference`, `Any`\\]) \u2013 dictionary column-> default value. Entries in the resulting table that\n not have a predecessor in the join will be set to this default value. If no\n default is provided, None will be used.\n * direction (`Direction`) \u2013 direction of the join, accepted values: Direction.BACKWARD,\n Direction.FORWARD, Direction.NEAREST\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 1 | 0 | 1 | 1\n 2 | 0 | 2 | 4\n 3 | 0 | 3 | 5\n 4 | 0 | 4 | 6\n 5 | 0 | 5 | 7\n 6 | 0 | 6 | 11\n 7 | 0 | 7 | 12\n 8 | 1 | 8 | 5\n 9 | 1 | 9 | 7\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | K | val | t\n 21 | 1 | 7 | 2\n 22 | 1 | 3 | 8\n 23 | 0 | 0 | 2\n 24 | 0 | 6 | 3\n 25 | 0 | 2 | 7\n 26 | 0 | 3 | 8\n 27 | 0 | 9 | 9\n 28 | 0 | 7 | 13\n 29 | 0 | 4 | 14\n '''\n)\nres = t1.asof_join_right(\n t2,\n t1.t,\n t2.t,\n t1.K == t2.K,\n defaults={t1.val: -1},\n).select(\n pw.this.shard_key,\n pw.this.t,\n val_left=t1.val,\n val_right=t2.val,\n sum=t1.val + t2.val,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nshard_key | t | val_left | val_right | sum\n0 | 2 | 1 | 0 | 1\n0 | 3 | 1 | 6 | 7\n0 | 7 | 5 | 2 | 7\n0 | 8 | 5 | 3 | 8\n0 | 9 | 5 | 9 | 14\n0 | 13 | 7 | 7 | 14\n0 | 14 | 7 | 4 | 11\n1 | 2 | -1 | 7 | 6\n1 | 8 | 9 | 3 | 12\n```\n::\n::\nasof_now_join(other, *on, how=JoinMode.INNER, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. Rows from self are not stored.\nThey are joined with rows of other at their processing time. If other is updated\nin the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT}\n which correspond to inner and left join respectively.\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nasof_now_join_inner(other, *on, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. Rows from self are not stored.\nThey are joined with rows of other at their processing time. If other is updated\nin the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nasof_now_join_left(other, *on, id=None)\nPerforms asof now join of self with other using join expressions. Each row of self\nis joined with rows from other at a given processing time. If there are no matching\nrows in other, missing values on the right side are replaced with None.\nRows from self are not stored. They are joined with rows of other at their processing\ntime. If other is updated in the future, rows from self from the past won\u2019t be updated.\nRows from other are stored. They can be joined with future rows of self.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *AsofNowJoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\ncast_to_types(kwargs)\nCasts columns to types.\nconcat(*others)\nConcats self with every other \u220a others.\nSemantics:\n- result.columns == self.columns == other.columns\n- result.id == self.id \u222a other.id\nif self.id and other.id collide, throws an exception.\nRequires:\n- other.columns == self.columns\n- self.id disjoint with other.id\n* Parameters\n other \u2013 the other table.\n* Returns\n *Table* \u2013 The concatenated table. Id\u2019s of rows from original tables are preserved.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''')\npw.universes.promise_are_pairwise_disjoint(t1, t2)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\nconcat_reindex(*tables)\nConcatenate contents of several tables.\nThis is similar to PySpark union. All tables must have the same schema. Each row is reindexed.\n* Parameters\n tables (`Table`) \u2013 List of tables to concatenate. All tables must have the same schema.\n* Returns\n *Table* \u2013 The concatenated table. It will have new, synthetic ids.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | pet\n1 | Dog\n7 | Cat\n''')\nt2 = pw.debug.table_from_markdown('''\n | pet\n1 | Manul\n8 | Octopus\n''')\nt3 = t1.concat_reindex(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\npet\nCat\nDog\nManul\nOctopus\n```\n::\n::\ncopy()\nReturns a copy of a table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.copy()\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n7 | Bob | dog\n8 | Alice | cat\n9 | Bob | dog\n10 | Alice | dog\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 is t2\n```\n::\nResult\n```\nFalse\n```\n::\n::\ndiff(timestamp, *values)\nCompute the difference between the values in the `values` columns and the previous values\naccording to the order defined by the column `timestamp`.\n* Parameters\n * timestamp (*-*) \u2013 The column reference to the `timestamp` column on\n which the order is computed.\n * \\*values (*-*) \u2013 Variable-length argument representing the column\n references to the `values` columns.\n* Returns\n `Table` \u2013 A new table where each column is replaced with a new column containing\n the difference and whose name is the concatenation of diff_ and the former name.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference.\nNOTE: * The value of the \u201cfirst\u201d value (the row with the lower value\n in the `timestamp` column) is `None`.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values\n1 | 1\n2 | 2\n3 | 4\n4 | 7\n5 | 11\n6 | 16\n''')\ntable += table.diff(pw.this.timestamp, pw.this.values)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values | diff_values\n1 | 1 |\n2 | 2 | 1\n3 | 4 | 2\n4 | 7 | 3\n5 | 11 | 4\n6 | 16 | 5\n```\n::\n::\ndifference(other)\nRestrict self universe to keys not appearing in the other table.\n* Parameters\n other (`Table`) \u2013 table with ids to remove from self.\n* Returns\n *Table* \u2013 table with restricted universe, with the same set of columns\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | cost\n2 | 100\n3 | 200\n4 | 300\n''')\nt3 = t1.difference(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n10 | Alice | 1\n```\n::\n::\nempty()\nCreates an empty table with a schema specified by kwargs.\n* Parameters\n kwargs (`DType`) \u2013 Dict whose keys are column names and values are column types.\n* Returns\n *Table* \u2013 Created empty table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.Table.empty(age=float, pet=float)\npw.debug.compute_and_print(t1, include_id=False)\n```\n::\nResult\n```\nage | pet\n```\n::\n::\nfilter(filter_expression)\nFilter a table according to filter condition.\n* Parameters\n filter \u2013 ColumnExpression that specifies the filtering condition.\n* Returns\n *Table* \u2013 Result has the same schema as self and its ids are subset of self.id.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nvertices = pw.debug.table_from_markdown('''\nlabel outdegree\n 1 3\n 7 0\n''')\nfiltered = vertices.filter(vertices.outdegree == 0)\npw.debug.compute_and_print(filtered, include_id=False)\n```\n::\nResult\n```\nlabel | outdegree\n7 | 0\n```\n::\n::\nflatten(*args, kwargs)\nPerforms a flatmap operation on a column or expression given as a first\nargument. Datatype of this column or expression has to be iterable.\nOther columns specified in the method arguments are duplicated\nas many times as the length of the iterable.\nIt is possible to get ids of source rows by using table.id column, e.g.\ntable.flatten(table.column_to_be_flattened, original_id = table.id).\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | pet | age\n1 | Dog | 2\n7 | Cat | 5\n''')\nt2 = t1.flatten(t1.pet)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\npet\nC\nD\na\ng\no\nt\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt3 = t1.flatten(t1.pet, t1.age)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\npet | age\nC | 5\nD | 2\na | 5\ng | 2\no | 2\nt | 5\n```\n::\n::\nfrom_columns(kwargs)\nBuild a table from columns.\nAll columns must have the same ids. Columns\u2019 names must be pairwise distinct.\n* Parameters\n * args (`ColumnReference`) \u2013 List of columns.\n * kwargs (`ColumnReference`) \u2013 Columns with their new names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.Table.empty(age=float, pet=float)\nt2 = pw.Table.empty(foo=float, bar=float).with_universe_of(t1)\nt3 = pw.Table.from_columns(t1.pet, qux=t2.foo)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\npet | qux\n```\n::\n::\ngroupby(*args, id=None, sort_by=None, _filter_out_results_of_forgetting=False)\nGroups table by columns from args.\nNOTE: Usually followed by .reduce() that aggregates the result and returns a table.\n* Parameters\n * args (`ColumnReference`) \u2013 columns to group by.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 if provided, is the column used to set id\u2019s of the rows of the result\n* Returns\n *GroupedTable* \u2013 Groupby object.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.groupby(t1.pet, t1.owner).reduce(t1.owner, t1.pet, ageagg=pw.reducers.sum(t1.age))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | pet | ageagg\nAlice | cat | 8\nAlice | dog | 10\nBob | dog | 16\n```\n::\n::\nhaving(*indexers)\nRemoves rows so that indexed.ix(indexer) is possible when some rows are missing,\nfor each indexer in indexers\nproperty id(: ColumnReference )\nGet reference to pseudocolumn containing id\u2019s of a table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.select(ids = t1.id)\nt2.typehints()['ids']\n```\n::\nResult\n```\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\npw.debug.compute_and_print(t2.select(test=t2.id == t2.ids), include_id=False)\n```\n::\nResult\n```\ntest\nTrue\nTrue\nTrue\nTrue\n```\n::\n::\ninterpolate(timestamp, *values, mode=InterpolateMode.LINEAR)\nInterpolates missing values in a column using the previous and next values based on a timestamps column.\n* Parameters\n * timestamp (*ColumnReference*) \u2013 Reference to the column containing timestamps.\n * \\*values (*ColumnReference*) \u2013 References to the columns containing values to be interpolated.\n * mode (*InterpolateMode, optional*) \u2013 The interpolation mode. Currently, only InterpolateMode.LINEAR is supported. Default is InterpolateMode.LINEAR.\n* Returns\n *Table* \u2013 A new table with the interpolated values.\n* Raises\n ValueError \u2013 If the columns are not ColumnReference or if the interpolation mode is not supported.\nNOTE: * The interpolation is performed based on linear interpolation between the previous and next values.\n* If a value is missing at the beginning or end of the column, no interpolation is performed.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\ntimestamp | values_a | values_b\n1 | 1 | 10\n2 | |\n3 | 3 |\n4 | |\n5 | |\n6 | 6 | 60\n''')\ntable = table.interpolate(pw.this.timestamp, pw.this.values_a, pw.this.values_b)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ntimestamp | values_a | values_b\n1 | 1 | 10\n2 | 2.0 | 20.0\n3 | 3 | 30.0\n4 | 4.0 | 40.0\n5 | 5.0 | 50.0\n6 | 6 | 60\n```\n::\n::\nintersect(*tables)\nRestrict self universe to keys appearing in all of the tables.\n* Parameters\n tables (`Table`) \u2013 tables keys of which are used to restrict universe.\n* Returns\n *Table* \u2013 table with restricted universe, with the same set of columns\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | cost\n2 | 100\n3 | 200\n4 | 300\n''')\nt3 = t1.intersect(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n```\n::\n::\ninterval_join(other, self_time, other_time, interval, *on, behavior=None, how=JoinMode.INNER)\nPerforms an interval join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n * how (`JoinMode`) \u2013 decides whether to run interval_join_inner, interval_join_left, interval_join_right\n or interval_join_outer. Default is INNER.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b, how=pw.JoinMode.INNER\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\ninterval_join_inner(other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_inner(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_inner(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\ninterval_join_left(other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval left join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows from the left\nside that haven\u2019t been matched with the right side are returned with missing\nvalues on the right side replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_left(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n11 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_left(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n1 | 11 |\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n3 | 4 |\n```\n::\n::\ninterval_join_outer(other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval outer join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows that haven\u2019t\nbeen matched with the other side are returned with missing values on the other\nside replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_outer(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 0\n | 7\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n11 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_outer(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n | | 0\n | | 2\n | | 7\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n1 | 11 |\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n3 | 4 |\n```\n::\n::\ninterval_join_right(other, self_time, other_time, interval, *on, behavior=None)\nPerforms an interval right join of self with other using a time difference\nand join expressions. If self_time + lower_bound <=\nother_time <= self_time + upper_bound\nand conditions in on are satisfied, the rows are joined. Rows from the right\nside that haven\u2019t been matched with the left side are returned with missing\nvalues on the left side replaced with None.\n* Parameters\n * other (`Table`) \u2013 the right side of the join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * lower_bound \u2013 a lower bound on time difference between other_time\n and self_time.\n * upper_bound \u2013 an upper bound on time difference between other_time\n and self_time.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * behavior (`Optional`\\[`CommonBehavior`\\]) \u2013 defines temporal behavior of a join - features like delaying entries\n or ignoring late entries.\n* Returns\n *IntervalJoinResult* \u2013 a result of the interval join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 3\n 2 | 4\n 3 | 5\n 4 | 11\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 0\n 2 | 1\n 3 | 4\n 4 | 7\n'''\n)\nt3 = t1.interval_join_right(t2, t1.t, t2.t, pw.temporal.interval(-2, 1)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 0\n | 7\n3 | 1\n3 | 4\n4 | 4\n5 | 4\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 3\n 2 | 1 | 4\n 3 | 1 | 5\n 4 | 1 | 11\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 0\n 2 | 1 | 1\n 3 | 1 | 4\n 4 | 1 | 7\n 5 | 2 | 0\n 6 | 2 | 2\n 7 | 4 | 2\n'''\n)\nt3 = t1.interval_join_right(\n t2, t1.t, t2.t, pw.temporal.interval(-2, 1), t1.a == t2.b\n).select(t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\na | left_t | right_t\n | | 0\n | | 2\n | | 7\n1 | 3 | 1\n1 | 3 | 4\n1 | 4 | 4\n1 | 5 | 4\n2 | 2 | 0\n2 | 2 | 2\n2 | 3 | 2\n```\n::\n::\nix(expression, *, optional=False, context=None)\nReindexes the table using expression values as keys. Uses keys from context, or tries to infer\nproper context from the expression.\nIf optional is True, then None in expression values result in None values in the result columns.\nMissing values in table keys result in RuntimeError.\nContext can be anything that allows for select or reduce, or pathway.this construct\n(latter results in returning a delayed operation, and should be only used when using ix inside\njoin().select() or groupby().reduce() sequence).\n* Returns\n Reindexed table with the same set of columns.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt_animals = pw.debug.table_from_markdown('''\n | epithet | genus\n1 | upupa | epops\n2 | acherontia | atropos\n3 | bubo | scandiacus\n4 | dynastes | hercules\n''')\nt_birds = pw.debug.table_from_markdown('''\n | desc\n2 | hoopoe\n4 | owl\n''')\nret = t_birds.select(t_birds.desc, latin=t_animals.ix(t_birds.id).genus)\npw.debug.compute_and_print(ret, include_id=False)\n```\n::\nResult\n```\ndesc | latin\nhoopoe | atropos\nowl | hercules\n```\n::\n::\nix_ref(*args, optional=False, context=None)\nReindexes the table using expressions as primary keys.\nUses keys from context, or tries to infer proper context from the expression.\nIf optional is True, then None in expression values result in None values in the result columns.\nMissing values in table keys result in RuntimeError.\nContext can be anything that allows for select or reduce, or pathway.this construct\n(latter results in returning a delayed operation, and should be only used when using ix inside\njoin().select() or groupby().reduce() sequence).\n* Parameters\n args (`Union`\\[`ColumnExpression`, `None`, `int`, `float`, `str`, `bytes`, `bool`, `Pointer`, `datetime`, `timedelta`, `ndarray`, `Json`, `dict`\\[`str`, `Any`\\], `tuple`\\[`Any`, `...`\\]\\]) \u2013 Column references.\n* Returns\n *Row* \u2013 indexed row.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nname | pet\nAlice | dog\nBob | cat\nCarole | cat\nDavid | dog\n''')\nt2 = t1.with_id_from(pw.this.name)\nt2 = t2.select(*pw.this, new_value=pw.this.ix_ref(\"Alice\").pet)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nname | pet | new_value\nAlice | dog | dog\nBob | cat | dog\nCarole | cat | dog\nDavid | dog | dog\n```\n::\n::\nTables obtained by a groupby/reduce scheme always have primary keys:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nname | pet\nAlice | dog\nBob | cat\nCarole | cat\nDavid | cat\n''')\nt2 = t1.groupby(pw.this.pet).reduce(pw.this.pet, count=pw.reducers.count())\nt3 = t1.select(*pw.this, new_value=t2.ix_ref(t1.pet).count)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nname | pet | new_value\nAlice | dog | 1\nBob | cat | 3\nCarole | cat | 3\nDavid | cat | 3\n```\n::\n::\nSingle-row tables can be accessed via ix_ref():\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nname | pet\nAlice | dog\nBob | cat\nCarole | cat\nDavid | cat\n''')\nt2 = t1.reduce(count=pw.reducers.count())\nt3 = t1.select(*pw.this, new_value=t2.ix_ref(context=t1).count)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nname | pet | new_value\nAlice | dog | 4\nBob | cat | 4\nCarole | cat | 4\nDavid | cat | 4\n```\n::\n::\njoin(other, *on, id=None, how=JoinMode.INNER)\nJoin self with other using the given join expression.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT,RIGHT,OUTER}\n correspond to inner, left, right and outer join respectively.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(\n t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER\n).select(age=t1.age, owner_name=t2.owner, size=t2.size)\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_inner(other, *on, id=None)\nInner-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER).select(age=t1.age, owner_name=t2.owner, size=t2.size) # noqa: E501\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_left(other, *on, id=None)\nLeft-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks:\nargs cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- rows from the right side that were not matched with the left side are skipped\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_left(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_outer(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_outer(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t1.id, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_right(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- rows from the left side that were not matched with the right side are skipped\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_right(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(pw.coalesce(t1.b,0) + t2.d,t1.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n```\n::\n::\n* Returns\n OuterJoinResult object\npointer_from(*args, optional=False)\nPseudo-random hash of its argument. Produces pointer types. Applied column-wise.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\ng = t1.groupby(t1.owner).reduce(refcol = t1.pointer_from(t1.owner)) # g.id == g.refcol\npw.debug.compute_and_print(g.select(test = (g.id == g.refcol)), include_id=False)\n```\n::\nResult\n```\ntest\nTrue\nTrue\n```\n::\n::\npromise_universe_is_equal_to(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the others.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n None\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nimport pytest\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 8 | Alice | cat\n2 | 9 | Bob | dog\n3 | 15 | Alice | tortoise\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | age | owner\n1 | 11 | Alice\n2 | 12 | Tom\n3 | 7 | Eve\n'''\n)\nt3 = t2.filter(pw.this.age > 10)\nwith pytest.raises(\n ValueError,\n match='Universe of the argument of Table.update_cells\\(\\) needs ' # noqa\n + 'to be a subset of the universe of the updated table.',\n):\n t1.update_cells(t3)\nt1 = t1.promise_universe_is_equal_to(t2)\nresult = t1.update_cells(t3)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n11 | Alice | cat\n12 | Tom | dog\n15 | Alice | tortoise\n```\n::\n::\npromise_universe_is_subset_of(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the other.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n''').promise_universe_is_subset_of(t1)\nt3 = t1 << t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\npromise_universes_are_disjoint(other)\nAsserts to Pathway that an universe of self is disjoint from universe of other.\nSemantics: Used in situations where Pathway cannot deduce universes are disjoint.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''').promise_universes_are_disjoint(t1)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\nreduce(*args, kwargs)\nReduce a table to a single row.\nEquivalent to self.groupby().reduce(\\*args, \\*\\*kwargs).\n* Parameters\n * args (`ColumnReference`) \u2013 reducer to reduce the table with\n * kwargs (`ColumnExpression`) \u2013 reducer to reduce the table with. Its key is the new name of a column.\n* Returns\n *Table* \u2013 Reduced table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.reduce(ageagg=pw.reducers.argmin(t1.age))\npw.debug.compute_and_print(t2, include_id=False) \n```\n::\nResult\n```\nageagg\n^...\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt3 = t2.select(t1.ix(t2.ageagg).age, t1.ix(t2.ageagg).pet)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | pet\n7 | dog\n```\n::\n::\nrename(names_mapping=None, kwargs)\nRename columns according either a dictionary or kwargs.\nIf a mapping is provided using a dictionary, `rename_by_dict` will be used.\nOtherwise, `rename_columns` will be used with kwargs.\nColumns not in keys(kwargs) are not changed. New name of a column must not be `id`.\n* Parameters\n * names_mapping (`Optional`\\`dict`\\[`str` | [`ColumnReference`, `str`\\]\\]) \u2013 mapping from old column names to new names.\n * kwargs (`ColumnExpression`) \u2013 mapping from old column names to new names.\n* Returns\n *Table* \u2013 self with columns renamed.\nrename_by_dict(names_mapping)\nRename columns according to a dictionary.\nColumns not in keys(kwargs) are not changed. New name of a column must not be id.\n* Parameters\n names_mapping (`dict`\\`str` | [`ColumnReference`, `str`\\]) \u2013 mapping from old column names to new names.\n* Returns\n *Table* \u2013 self with columns renamed.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.rename_by_dict({\"age\": \"years_old\", t1.pet: \"animal\"})\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | years_old | animal\nAlice | 8 | 2\nAlice | 10 | 1\nBob | 9 | 1\n```\n::\n::\nrename_columns(kwargs)\nRename columns according to kwargs.\nColumns not in keys(kwargs) are not changed. New name of a column must not be id.\n* Parameters\n kwargs (`str` | `ColumnReference`) \u2013 mapping from old column names to new names.\n* Returns\n *Table* \u2013 self with columns renamed.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.rename_columns(years_old=t1.age, animal=t1.pet)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | years_old | animal\nAlice | 8 | 2\nAlice | 10 | 1\nBob | 9 | 1\n```\n::\n::\nrestrict(other)\nRestrict self universe to keys appearing in other.\n* Parameters\n other (`TableLike`) \u2013 table which universe is used to restrict universe of self.\n* Returns\n *Table* \u2013 table with restricted universe, with the same set of columns\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | cost\n2 | 100\n3 | 200\n'''\n)\nt2.promise_universe_is_subset_of(t1)\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt3 = t1.restrict(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n```\n::\n::\nproperty schema(: type[pathway.internals.schema.Schema] )\nGet schema of the table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt1.schema\n```\n::\nResult\n```\n, 'owner': , 'pet': }>\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1.typehints()['age']\n```\n::\nResult\n```\n```\n::\n::\nselect(*args, kwargs)\nBuild a new table with columns specified by kwargs.\nOutput columns\u2019 names are keys(kwargs). values(kwargs) can be raw values, boxed\nvalues, columns. Assigning to id reindexes the table.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`Any`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\npet\nDog\nCat\n''')\nt2 = t1.select(animal=t1.pet, desc=\"fluffy\")\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nanimal | desc\nCat | fluffy\nDog | fluffy\n```\n::\n::\nproperty slice(: TableSlice )\nCreates a collection of references to self columns.\nSupports basic column manipulation methods.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt1.slice.without(\"age\")\n```\n::\nResult\n```\nTableSlice({'owner': .owner, 'pet': .pet})\n```\n::\n::\nsort(key, instance=None)\nSorts a table by the specified keys.\n* Parameters\n * table \u2013 pw.Table\n The table to be sorted.\n * key (`ColumnExpression`) \u2013 ColumnReference\n An expression to sort by.\n * instance (`Optional`\\[`ColumnExpression`\\]) \u2013 ColumnReference or None\n An expression with instance. Rows are sorted within an instance.\n `prev` and `next` columns will only point to rows that have the same instance.\n* Returns\n *pw.Table* \u2013 The sorted table. Contains two columns: `prev` and `next`, containing the pointers\n to the previous and next rows.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\nname | age | score\nAlice | 25 | 80\nBob | 20 | 90\nCharlie | 30 | 80\n''')\ntable = table.with_id_from(pw.this.name)\ntable += table.sort(key=pw.this.age)\npw.debug.compute_and_print(table, include_id=True)\n```\n::\nResult\n```\n | name | age | score | prev | next\n^GBSDEEW... | Alice | 25 | 80 | ^EDPSSB1... | ^DS9AT95...\n^EDPSSB1... | Bob | 20 | 90 | | ^GBSDEEW...\n^DS9AT95... | Charlie | 30 | 80 | ^GBSDEEW... |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\ntable = pw.debug.table_from_markdown('''\nname | age | score\nAlice | 25 | 80\nBob | 20 | 90\nCharlie | 30 | 80\nDavid | 35 | 90\nEve | 15 | 80\n''')\ntable = table.with_id_from(pw.this.name)\ntable += table.sort(key=pw.this.age, instance=pw.this.score)\npw.debug.compute_and_print(table, include_id=True)\n```\n::\nResult\n```\n | name | age | score | prev | next\n^GBSDEEW... | Alice | 25 | 80 | ^T0B95XH... | ^DS9AT95...\n^EDPSSB1... | Bob | 20 | 90 | | ^RT0AZWX...\n^DS9AT95... | Charlie | 30 | 80 | ^GBSDEEW... |\n^RT0AZWX... | David | 35 | 90 | ^EDPSSB1... |\n^T0B95XH... | Eve | 15 | 80 | | ^GBSDEEW...\n```\n::\n::\ntypehints()\nReturn the types of the columns as a dictionary.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt1.typehints()\n```\n::\nResult\n```\nmappingproxy({'age': , 'owner': , 'pet': })\n```\n::\n::\nupdate_cells(other)\nUpdates cells of self, breaking ties in favor of the values in other.\nSemantics:\n * result.columns == self.columns\n * result.id == self.id\n * conflicts are resolved preferring other\u2019s values\nRequires:\n * other.columns \u2286 self.columns\n * other.id \u2286 self.id\n* Parameters\n other (`Table`) \u2013 the other table.\n* Returns\n *Table* \u2013 self updated with cells form other.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n age | owner | pet\n1 | 10 | Alice | 30\n''')\npw.universes.promise_is_subset_of(t2, t1)\nt3 = t1.update_cells(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\nupdate_rows(other)\nUpdates rows of self, breaking ties in favor for the rows in other.\nSemantics:\n- result.columns == self.columns == other.columns\n- result.id == self.id \u222a other.id\nRequires:\n- other.columns == self.columns\n* Parameters\n other (`Table`\\`TypeVar`(`TSchema`, bound= [`Schema`)\\]) \u2013 the other table.\n* Returns\n *Table* \u2013 self updated with rows form other.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n12 | 12 | Tom | 40\n''')\nt3 = t1.update_rows(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n12 | Tom | 40\n```\n::\n::\nupdate_types(kwargs)\nUpdates types in schema. Has no effect on the runtime.\nwindow_join(other, self_time, other_time, window, *on, how=JoinMode.INNER)\nPerforms a window join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * how (`JoinMode`) \u2013 decides whether to run window_join_inner, window_join_left, window_join_right\n or window_join_outer. Default is INNER.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n```\n::\n::\nwindow_join_inner(other, self_time, other_time, window, *on)\nPerforms a window join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_inner(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_inner(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_inner(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n```\n::\n::\nwindow_join_left(other, self_time, other_time, window, *on)\nPerforms a window left join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from the left side that didn\u2019t match with any record on the right side in\na given window, are returned with missing values on the right side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n1 |\n1 | 2\n2 | 2\n2 | 2\n3 |\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n13 |\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_left(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t1.a, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_left(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n10 |\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_left(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t1.a, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n2 | 7 |\n3 | 4 |\n```\n::\n::\nwindow_join_outer(other, self_time, other_time, window, *on)\nPerforms a window outer join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from both sides that didn\u2019t match with any record on the other side in\na given window, are returned with missing values on the other side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n1 |\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n | 5\n | 6\n1 |\n1 | 2\n2 | 2\n2 | 2\n3 |\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n13 |\n13 |\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_outer(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | | 5\n1 | 1 |\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n1 | 13 |\n2 | 1 |\n2 | 2 | 2\n2 | 2 | 3\n3 | 4 |\n4 | | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_outer(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | -3\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n10 |\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_outer(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=pw.coalesce(t1.a, t2.b), left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | | 10\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n2 | 7 |\n3 | 4 |\n4 | | 3\n```\n::\n::\nwindow_join_right(other, self_time, other_time, window, *on)\nPerforms a window right join of self with other using a window and join expressions.\nIf two records belong to the same window and meet the conditions specified in\nthe on clause, they will be joined. Note that if a sliding window is used and\nthere are pairs of matching records that appear in more than one window,\nthey will be included in the result multiple times (equal to the number of\nwindows they appear in).\nWhen using a session window, the function creates sessions by concatenating\nrecords from both sides of a join. Only pairs of records that meet\nthe conditions specified in the on clause can be part of the same session.\nThe result of a given session will include all records from the left side of\na join that belong to this session, joined with all records from the right\nside of a join that belong to this session.\nRows from the right side that didn\u2019t match with any record on the left side in\na given window, are returned with missing values on the left side replaced\nwith None. The multiplicity of such rows equals the number of windows they\nbelong to and don\u2019t have a match in them.\n* Parameters\n * other (`Table`) \u2013 the right side of a join.\n * self_time (`ColumnExpression`) \u2013 time expression in self.\n * other_time (`ColumnExpression`) \u2013 time expression in other.\n * window (`Window`) \u2013 a window to use.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == on the top level\n operation and be of the form LHS: ColumnReference == RHS: ColumnReference.\n* Returns\n *WindowJoinResult* \u2013 a result of the window join. A method .select()\n can be called on it to extract relevant columns from the result of a join.\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 1\n 2 | 2\n 3 | 3\n 4 | 7\n 5 | 13\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 1 | 2\n 2 | 5\n 3 | 6\n 4 | 7\n'''\n)\nt3 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.tumbling(2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.sliding(1, 2)).select(\n left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t4, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | 5\n | 5\n | 6\n1 | 2\n2 | 2\n2 | 2\n3 | 2\n7 | 6\n7 | 7\n7 | 7\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 2\n 3 | 1 | 3\n 4 | 1 | 7\n 5 | 1 | 13\n 6 | 2 | 1\n 7 | 2 | 2\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | 2\n 2 | 1 | 5\n 3 | 1 | 6\n 4 | 1 | 7\n 5 | 2 | 2\n 6 | 2 | 3\n 7 | 4 | 3\n'''\n)\nt3 = t1.window_join_right(t2, t1.t, t2.t, pw.temporal.tumbling(2), t1.a == t2.b).select(\n key=t2.b, left_t=t1.t, right_t=t2.t\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | | 5\n1 | 2 | 2\n1 | 3 | 2\n1 | 7 | 6\n1 | 7 | 7\n2 | 2 | 2\n2 | 2 | 3\n4 | | 3\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | 0\n 1 | 5\n 2 | 10\n 3 | 15\n 4 | 17\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | t\n 0 | -3\n 1 | 2\n 2 | 3\n 3 | 6\n 4 | 16\n'''\n)\nt3 = t1.window_join_right(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2)\n).select(left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nleft_t | right_t\n | -3\n0 | 2\n0 | 3\n0 | 6\n5 | 2\n5 | 3\n5 | 6\n15 | 16\n17 | 16\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt1 = pw.debug.table_from_markdown(\n '''\n | a | t\n 1 | 1 | 1\n 2 | 1 | 4\n 3 | 1 | 7\n 4 | 2 | 0\n 5 | 2 | 3\n 6 | 2 | 4\n 7 | 2 | 7\n 8 | 3 | 4\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | b | t\n 1 | 1 | -1\n 2 | 1 | 6\n 3 | 2 | 2\n 4 | 2 | 10\n 5 | 4 | 3\n'''\n)\nt3 = t1.window_join_right(\n t2, t1.t, t2.t, pw.temporal.session(predicate=lambda a, b: abs(a - b) <= 2), t1.a == t2.b\n).select(key=t2.b, left_t=t1.t, right_t=t2.t)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nkey | left_t | right_t\n1 | 1 | -1\n1 | 4 | 6\n1 | 7 | 6\n2 | | 10\n2 | 0 | 2\n2 | 3 | 2\n2 | 4 | 2\n4 | | 3\n```\n::\n::\nwindowby(time_expr, *, window, behavior=None, shard=None)\nCreate a GroupedTable by windowing the table (based on expr and window),\noptionally sharded with shard\n* Parameters\n * time_expr (`ColumnExpression`) \u2013 Column expression used for windowing\n * window (`Window`) \u2013 type window to use\n * shard (`Optional`\\[`ColumnExpression`\\]) \u2013 optional column expression to act as a shard key\nExamples:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n'''\n | shard | t | v\n1 | 0 | 1 | 10\n2 | 0 | 2 | 1\n3 | 0 | 4 | 3\n4 | 0 | 8 | 2\n5 | 0 | 9 | 4\n6 | 0 | 10| 8\n7 | 1 | 1 | 9\n8 | 1 | 2 | 16\n''')\nresult = t.windowby(\n t.t, window=pw.temporal.session(predicate=lambda a, b: abs(a-b) <= 1), shard=t.shard\n).reduce(\npw.this.shard,\nmin_t=pw.reducers.min(pw.this.t),\nmax_v=pw.reducers.max(pw.this.v),\ncount=pw.reducers.count(),\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nshard | min_t | max_v | count\n0 | 1 | 10 | 2\n0 | 4 | 3 | 1\n0 | 8 | 8 | 3\n1 | 1 | 16 | 2\n```\n::\n::\nwith_columns(*args, kwargs)\nUpdates columns of self, according to args and kwargs.\nSee table.select specification for evaluation of args and kwargs.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | owner | pet | size\n1 | Tom | 1 | 10\n2 | Bob | 1 | 9\n3 | Tom | 2 | 8\n''').with_universe_of(t1)\nt3 = t1.with_columns(*t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet | size\n8 | Tom | 2 | 8\n9 | Bob | 1 | 9\n10 | Tom | 1 | 10\n```\n::\n::\nwith_id(new_index)\nSet new ids based on another column containing id-typed values.\nTo generate ids based on arbitrary valued columns, use with_id_from.\nValues assigned must be row-wise unique.\n* Parameters\n new_id \u2013 column to be used as the new index.\n* Returns\n Table with updated ids.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pytest; pytest.xfail(\"with_id is hard to test\")\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | new_id\n1 | 2\n2 | 3\n3 | 4\n''')\nt3 = t1.promise_universe_is_subset_of(t2).with_id(t2.new_id)\npw.debug.compute_and_print(t3)\n```\n::\nResult\n```\n age owner pet\n^2 10 Alice 1\n^3 9 Bob 1\n^4 8 Alice 2\n```\n::\n::\nwith_id_from(*args)\nCompute new ids based on values in columns.\nIds computed from columns must be row-wise unique.\n* Parameters\n columns \u2013 columns to be used as primary keys.\n* Returns\n *Table* \u2013 self updated with recomputed ids.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n 1 | 10 | Alice | 1\n 2 | 9 | Bob | 1\n 3 | 8 | Alice | 2\n''')\nt2 = t1 + t1.select(old_id=t1.id)\nt3 = t2.with_id_from(t2.age)\npw.debug.compute_and_print(t3) \n```\n::\nResult\n```\n | age | owner | pet | old_id\n^... | 8 | Alice | 2 | ^...\n^... | 9 | Bob | 1 | ^...\n^... | 10 | Alice | 1 | ^...\n```\n::\n::\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nt4 = t3.select(t3.age, t3.owner, t3.pet, same_as_old=(t3.id == t3.old_id),\n same_as_new=(t3.id == t3.pointer_from(t3.age)))\npw.debug.compute_and_print(t4) \n```\n::\nResult\n```\n | age | owner | pet | same_as_old | same_as_new\n^... | 8 | Alice | 2 | False | True\n^... | 9 | Bob | 1 | False | True\n^... | 10 | Alice | 1 | False | True\n```\n::\n::\nwith_prefix(prefix)\nRename columns by adding prefix to each name of column.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.with_prefix(\"u_\")\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nu_age | u_owner | u_pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n```\n::\n::\nwith_suffix(suffix)\nRename columns by adding suffix to each name of column.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.with_suffix(\"_current\")\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nage_current | owner_current | pet_current\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n```\n::\n::\nwith_universe_of(other)\nReturns a copy of self with exactly the same universe as others.\nSemantics: Required precondition self.universe == other.universe\nUsed in situations where Pathway cannot deduce equality of universes, but\nthose are equal as verified during runtime.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | pet\n1 | Dog\n7 | Cat\n''')\nt2 = pw.debug.table_from_markdown('''\n | age\n1 | 10\n7 | 3\n''').with_universe_of(t1)\nt3 = t1 + t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\npet | age\nCat | 3\nDog | 10\n```\n::\n::\nwithout(*columns)\nSelects all columns without named column references.\n* Parameters\n columns (`str` | `ColumnReference`) \u2013 columns to be dropped provided by table.column_name notation.\n* Returns\n *Table* \u2013 self without specified columns.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = t1.without(t1.age, pw.this.pet)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner\nAlice\nAlice\nBob\n```\n::\n::\nclass pw.TableLike(context)\nInterface class for table-likes: Table, GroupedTable and JoinResult.\nAll of those contain universe info, and thus support universe-related asserts.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\ng1 = t1.groupby(t1.owner)\nt2 = t1.filter(t1.age >= 9)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n9 | Bob | dog\n10 | Alice | dog\n```\n::\n::\n```python\ng2 = t2.groupby(t2.owner)\npw.universes.promise_is_subset_of(g2, g1) # t2 is a subset of t1, so this is safe\n```\npromise_universe_is_equal_to(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the others.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n None\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nimport pytest\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 8 | Alice | cat\n2 | 9 | Bob | dog\n3 | 15 | Alice | tortoise\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | age | owner\n1 | 11 | Alice\n2 | 12 | Tom\n3 | 7 | Eve\n'''\n)\nt3 = t2.filter(pw.this.age > 10)\nwith pytest.raises(\n ValueError,\n match='Universe of the argument of Table.update_cells\\(\\) needs ' # noqa\n + 'to be a subset of the universe of the updated table.',\n):\n t1.update_cells(t3)\nt1 = t1.promise_universe_is_equal_to(t2)\nresult = t1.update_cells(t3)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n11 | Alice | cat\n12 | Tom | dog\n15 | Alice | tortoise\n```\n::\n::\npromise_universe_is_subset_of(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the other.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n''').promise_universe_is_subset_of(t1)\nt3 = t1 << t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\npromise_universes_are_disjoint(other)\nAsserts to Pathway that an universe of self is disjoint from universe of other.\nSemantics: Used in situations where Pathway cannot deduce universes are disjoint.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Table API\nsidebar: 'API'\nnavigation: true\n---\n# Table API\nThe Pathway programming framework is organized around work with data tables.\nThis page contains reference for the Pathway Table class.\nclass pw.Table(columns, context, schema=None)\nCollection of named columns over identical universes.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''').promise_universes_are_disjoint(t1)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.io.redpanda package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.redpanda package\nFunctions\npw.io.redpanda.read(rdkafka_settings, topic=None, *, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None, topic_names=None)\nReads table from a set of topics in Redpanda.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of\n librdkafka.\n * topic (`UnionType`\\[`str`, `list`\\[`str`\\], `None`\\]) \u2013 Name of topic in Redpanda from which the data should be read.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the field which require such mapping, it should be\n given in the format : , where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table, required for format other than\n \u201craw\u201d. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated as uuid4. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (pw.Type) of the values of those columns. This parameter is optional, and if not\n provided the default type is pw.Type.ANY. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named data.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider a simple instance of Redpanda without authentication. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"plaintext\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\"\n}\n```\nTo connect to the topic \u201canimals\u201d and accept messages, the connector must be used as follows, depending on the format:\nRaw version:\n```python\nimport pathway as pw\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"raw\",\n)\n```\nAll the data will be accessible in the column data.\nCSV version:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"csv\",\n schema=InputSchema,\n)\n```\nIn case of CSV format, the first message must be the header:\n```csv\nowner,pet\n```\nThen, simple data rows are expected. For example:\n```csv\nAlice,cat\nBob,dog\n```\nThis way, you get a table which looks as follows:\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice cat\n Bob dog\n```\n::\n::\nJSON version:\n```python\nimport pathway as pw\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"json\",\n schema=InputSchema,\n)\n```\nFor the JSON connector, you can send these two messages:\n```json\n{\"owner\": \"Alice\", \"pet\": \"cat\"}\n{\"owner\": \"Bob\", \"pet\": \"dog\"}\n```\nThis way, you get a table which looks as follows:\n"} -{"doc": "---\ntitle: pathway.io.redpanda package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.redpanda package\nFunctions\npw.io.redpanda.read(rdkafka_settings, topic=None, *, schema=None, format='raw', debug_data=None, autocommit_duration_ms=1500, json_field_paths=None, parallel_readers=None, persistent_id=None, value_columns=None, primary_key=None, types=None, default_values=None, topic_names=None)\nReads table from a set of topics in Redpanda.\nThere are three formats currently supported: \u201craw\u201d, \u201ccsv\u201d, and \u201cjson\u201d.\n* Parameters\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of\n librdkafka.\n * topic (`UnionType`\\[`str`, `list`\\[`str`\\], `None`\\]) \u2013 Name of topic in Redpanda from which the data should be read.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * format \u2013 format of the input data, \u201craw\u201d, \u201ccsv\u201d, or \u201cjson\u201d\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * json_field_paths (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 If the format is JSON, this field allows to map field names\n into path in the field. For the field which require such mapping, it should be\n given in the format : , where the path to\n be mapped needs to be a\n JSON Pointer (RFC 6901).\n * parallel_readers (`Optional`\\[`int`\\]) \u2013 number of copies of the reader to work in parallel. In case\n the number is not specified, min{pathway_threads, total number of partitions}\n will be taken. This number also can\u2019t be greater than the number of Pathway\n engine threads, and will be reduced to the number of engine threads, if it\n exceeds.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Columns to extract for a table, required for format other than\n \u201craw\u201d. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated as uuid4. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (pw.Type) of the values of those columns. This parameter is optional, and if not\n provided the default type is pw.Type.ANY. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nWhen using the format \u201craw\u201d, the connector will produce a single-column table:\nall the data is saved into a column named data.\nFor other formats, the argument value_column is required and defines the columns.\nExample:\nConsider a simple instance of Redpanda without authentication. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"plaintext\",\n \"group.id\": \"$GROUP_NAME\",\n \"session.timeout.ms\": \"60000\"\n}\n```\nTo connect to the topic \u201canimals\u201d and accept messages, the connector must be used as follows, depending on the format:\nRaw version:\n```python\nimport pathway as pw\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"raw\",\n)\n```\nAll the data will be accessible in the column data.\nCSV version:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"csv\",\n schema=InputSchema,\n)\n```\nIn case of CSV format, the first message must be the header:\n```csv\nowner,pet\n```\nThen, simple data rows are expected. For example:\n```csv\nAlice,cat\nBob,dog\n```\nThis way, you get a table which looks as follows:\n```python\npw.debug.compute_and_print(t, include_id=False) \n```\n::\nResult\n```\nowner pet\nAlice cat\n Bob dog\n```\n::\n::\nNow consider that the data about pets come in a more sophisticated way. For instance\nyou have an owner, kind and name of an animal, along with some physical measurements.\nThe JSON payload in this case may look as follows:\n```json\n{\n \"name\": \"Jack\",\n \"pet\": {\n \"animal\": \"cat\",\n \"name\": \"Bob\",\n \"measurements\": [100, 200, 300]\n }\n}\n```\nSuppose you need to extract a name of the pet and the height, which is the 2nd\n(1-based) or the 1st (0-based) element in the array of measurements. Then, you\nuse JSON Pointer and do a connector, which gets the data as follows:\n```python\nimport pathway as pw\nclass InputSchema(pw.Schema):\n pet_name: str\n pet_height: int\nt = pw.io.redpanda.read(\n rdkafka_settings,\n topic=\"animals\",\n format=\"json\",\n schema=InputSchema,\n json_field_paths={\n \"pet_name\": \"/pet/name\",\n \"pet_height\": \"/pet/measurements/1\"\n },\n)\n```\npw.io.redpanda.write(table, rdkafka_settings, topic_name, *, format='json', kwargs)\nWrite a table to a given topic on a Redpanda instance.\n* Parameters\n * table (`Table`) \u2013 the table to output.\n * rdkafka_settings (`dict`) \u2013 Connection settings in the format of librdkafka.\n * topic_name (`str`) \u2013 name of topic in Redpanda to which the data should be sent.\n * format (`str`) \u2013 format of the input data, only \u201cjson\u201d is currently supported.\n* Returns\n None\nLimitations:\nFor future proofing, the format is configurable, but (for now) only JSON is available.\nExample:\nConsider there is a queue in Redpanda, running locally on port 9092. Our queue can\nuse SASL-SSL authentication over a SCRAM-SHA-256 mechanism. You can set up a queue\nwith similar parameters in Upstash. Settings for rdkafka\nwill look as follows:\n```python\nimport os\nrdkafka_settings = {\n \"bootstrap.servers\": \"localhost:9092\",\n \"security.protocol\": \"sasl_ssl\",\n \"sasl.mechanism\": \"SCRAM-SHA-256\",\n \"sasl.username\": os.environ[\"KAFKA_USERNAME\"],\n \"sasl.password\": os.environ[\"KAFKA_PASSWORD\"]\n}\n```\nYou want to send a Pathway table t to the Redpanda instance.\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\"age owner pet \\n 1 10 Alice dog \\n 2 9 Bob cat \\n 3 8 Alice cat\")\n```\nTo connect to the topic \u201canimals\u201d and send messages, the connector must be used as follows, depending on the format:\nJSON version:\n```python\npw.io.redpanda.write(\n t,\n rdkafka_settings,\n \"animals\",\n format=\"json\",\n)\n```\nAll the updates of table t will be sent to the Redpanda instance.\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-26T01:23:00\n 2 | 2023-03-27T01:23:00\n 3 | 2023-10-29T01:23:00\n 4 | 2023-10-30T01:23:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nt3 = t2.with_columns(\n new_date=pw.this.date.dt.add_duration_in_timezone(\n datetime.timedelta(hours=2), timezone=\"Europe/Warsaw\"\n ),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ndate | new_date\n2023-03-26 01:23:00 | 2023-03-26 04:23:00\n2023-03-27 01:23:00 | 2023-03-27 03:23:00\n2023-10-29 01:23:00 | 2023-10-29 02:23:00\n2023-10-30 01:23:00 | 2023-10-30 03:23:00\n```\n::\n::\nday()\nExtracts day from a DateTime.\n* Returns\n Day as int. 1 <= day <= 31 (depending on a month)\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1974-03-12T00:00:00\n 2 | 2023-03-25T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\npw.debug.compute_and_print(table_with_days, include_id=False)\n```\n::\nResult\n```\nday\n12\n15\n25\n```\n::\n::\ndays()\nThe total number of days in a Duration.\n* Returns\n Days as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-03-15T00:00:00 | 2023-05-15T10:13:23\n 1 | 2023-04-15T00:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-01T10:00:00 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:00:00 | 2023-05-15T09:00:00\n 4 | 2023-05-15T10:00:00 | 2023-05-15T11:00:00\n 5 | 2023-05-16T12:13:00 | 2023-05-15T10:00:00\n 6 | 2024-05-15T14:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_days = table_with_diff.select(days=pw.this[\"diff\"].dt.days())\npw.debug.compute_and_print(table_with_days, include_id=False)\n```\n::\nResult\n```\ndays\n-61\n-30\n-14\n0\n0\n1\n366\n```\n::\n::\nfloor(duration)\nTruncates DateTime to precision specified by duration argument.\n* Parameters\n duration (`ColumnExpression` | `Timedelta` | `str`) \u2013 truncation precision\nNOTE: Duration can be given as a string, in such case we accept aliases used\nby Pandas\nthat represent a fixed duration, so e.g. \u201cM\u201d will not be accepted.\nFor ambiguous frequencies, you can use other methods, e.g. `column.dt.month()`\ninstead of `column.dt.floor(\"1M\")`.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-05-15T12:23:12\n 2 | 2023-05-15T12:33:21\n 3 | 2023-05-15T13:20:35\n 4 | 2023-05-15T13:51:41\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nres = t2.with_columns(\n truncated_to_hours=pw.this.date.dt.floor(datetime.timedelta(hours=1)),\n truncated_to_10_min=pw.this.date.dt.floor(datetime.timedelta(minutes=10)),\n truncated_to_15_s=pw.this.date.dt.floor(datetime.timedelta(seconds=15)),\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\ndate | truncated_to_hours | truncated_to_10_min | truncated_to_15_s\n2023-05-15 12:23:12 | 2023-05-15 12:00:00 | 2023-05-15 12:20:00 | 2023-05-15 12:23:00\n2023-05-15 12:33:21 | 2023-05-15 12:00:00 | 2023-05-15 12:30:00 | 2023-05-15 12:33:15\n2023-05-15 13:20:35 | 2023-05-15 13:00:00 | 2023-05-15 13:20:00 | 2023-05-15 13:20:30\n2023-05-15 13:51:41 | 2023-05-15 13:00:00 | 2023-05-15 13:50:00 | 2023-05-15 13:51:30\n```\n::\n::\nfrom_timestamp(unit)\nConverts timestamp represented as an int to DateTime.\n* Parameters\n * timestamp \u2013 value to be converted to DateTime\n * unit (`str`) \u2013 unit of a timestamp. It has to be one of \u2018s\u2019, \u2018ms\u2019, \u2018us\u2019, \u2018ns\u2019\n* Returns\n DateTime\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt1 = pw.debug.table_from_markdown(\n '''\n | timestamp\n1 | 10\n2 | 1685969950\n'''\n)\nt2 = t1.select(date=pw.this.timestamp.dt.from_timestamp(unit=\"s\"))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ndate\n1970-01-01 00:00:10\n2023-06-05 12:59:10\n```\n::\n::\nhour()\nExtracts hour from a DateTime.\n* Returns\n Hour as int. 0 <= hour < 24\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T00:00:00\n 2 | 2023-05-15T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_hours = table_with_datetime.select(hour=table_with_datetime.t1.dt.hour())\npw.debug.compute_and_print(table_with_hours, include_id=False)\n```\n::\nResult\n```\nhour\n0\n12\n14\n```\n::\n::\nhours()\nThe total number of hours in a Duration.\n* Returns\n Hours as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T00:00:00 | 2023-05-15T10:13:23\n 1 | 2023-05-15T00:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-15T10:00:00 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:00:23 | 2023-05-15T10:00:00\n 4 | 2023-05-15T12:13:00 | 2023-05-15T10:00:00\n 5 | 2023-05-15T14:13:23 | 2023-05-15T10:00:00\n 6 | 2023-05-16T10:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_hours = table_with_diff.select(hours=pw.this[\"diff\"].dt.hours())\npw.debug.compute_and_print(table_with_hours, include_id=False)\n```\n::\nResult\n```\nhours\n-10\n-10\n0\n0\n2\n4\n24\n```\n::\n::\nmicrosecond()\nExtracts microseconds from a DateTime.\n* Returns\n Microsecond as int. 0 <= microsecond < 1_000_000\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.000012000\n 3 | 2023-05-15T10:13:00.123456789\n 4 | 2023-05-15T10:13:23.123456789\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_microseconds = table_with_datetime.select(\n microsecond=table_with_datetime.t1.dt.microsecond()\n)\npw.debug.compute_and_print(table_with_microseconds, include_id=False)\n```\n::\nResult\n```\nmicrosecond\n0\n12\n123456\n123456\n```\n::\n::\nmicroseconds()\nThe total number of microseconds in a Duration.\n* Returns\n Microseconds as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:23.123456789\n 1 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.000012000 | 2023-05-15T10:13:00.000000000\n 3 | 2023-05-15T10:13:00.123456789 | 2023-05-15T10:13:00.000000000\n 4 | 2023-05-15T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n 5 | 2023-05-16T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_microseconds = table_with_diff.select(\n microseconds=pw.this[\"diff\"].dt.microseconds()\n)\npw.debug.compute_and_print(table_with_microseconds, include_id=False)\n```\n::\nResult\n```\nmicroseconds\n-23123456\n0\n12\n123456\n23123456\n86423123456\n```\n::\n::\nmillisecond()\nExtracts milliseconds from a DateTime.\n* Returns\n Millisecond as int. 0 <= millisecond < 1_000\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.012000000\n 3 | 2023-05-15T10:13:00.123456789\n 4 | 2023-05-15T10:13:23.123456789\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_milliseconds = table_with_datetime.select(\n millisecond=table_with_datetime.t1.dt.millisecond()\n)\npw.debug.compute_and_print(table_with_milliseconds, include_id=False)\n```\n::\nResult\n```\nmillisecond\n0\n12\n123\n123\n```\n::\n::\nmilliseconds()\nThe total number of milliseconds in a Duration.\n* Returns\n Milliseconds as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:23.123456789\n 1 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.012000000 | 2023-05-15T10:13:00.000000000\n 3 | 2023-05-15T10:13:00.123456789 | 2023-05-15T10:13:00.000000000\n 4 | 2023-05-15T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n 5 | 2023-05-16T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_milliseconds = table_with_diff.select(\n milliseconds=pw.this[\"diff\"].dt.milliseconds()\n)\npw.debug.compute_and_print(table_with_milliseconds, include_id=False)\n```\n::\nResult\n```\nmilliseconds\n-23123\n0\n12\n123\n23123\n86423123\n```\n::\n::\nminute()\nExtracts minute from a DateTime.\n* Returns\n Minute as int. 0 <= minute < 60\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:00:00\n 2 | 2023-05-15T10:00:23\n 3 | 2023-05-15T10:13:00\n 4 | 2023-05-15T10:13:23\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_minutes = table_with_datetime.select(\n minute=table_with_datetime.t1.dt.minute()\n)\npw.debug.compute_and_print(table_with_minutes, include_id=False)\n```\n::\nResult\n```\nminute\n0\n0\n13\n13\n```\n::\n::\nminutes()\nThe total number of minutes in a Duration.\n* Returns\n Minutes as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:00:00 | 2023-05-15T10:13:23\n 1 | 2023-05-15T10:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-15T10:00:23 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:13:00 | 2023-05-15T10:00:00\n 4 | 2023-05-15T10:13:23 | 2023-05-15T10:00:00\n 5 | 2023-05-16T10:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_minutes = table_with_diff.select(minutes=pw.this[\"diff\"].dt.minutes())\npw.debug.compute_and_print(table_with_minutes, include_id=False)\n```\n::\nResult\n```\nminutes\n-13\n0\n0\n13\n13\n1453\n```\n::\n::\nmonth()\nExtracts month from a DateTime.\n* Returns\n Month as int. 1 <= month <= 12\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1974-03-12T00:00:00\n 2 | 2023-03-25T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_months = table_with_datetime.select(month=table_with_datetime.t1.dt.month())\npw.debug.compute_and_print(table_with_months, include_id=False)\n```\n::\nResult\n```\nmonth\n3\n3\n5\n```\n::\n::\nnanosecond()\nExtracts nanoseconds from a DateTime.\n* Returns\n Nanosecond as int. 0 <= nanosecond < 1_000_000_000\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.000000012\n 3 | 2023-05-15T10:13:00.123456789\n 4 | 2023-05-15T10:13:23.123456789\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_nanoseconds = table_with_datetime.select(\n nanosecond=table_with_datetime.t1.dt.nanosecond()\n)\npw.debug.compute_and_print(table_with_nanoseconds, include_id=False)\n```\n::\nResult\n```\nnanosecond\n0\n12\n123456789\n123456789\n```\n::\n::\nnanoseconds()\nThe total number of nanoseconds in a Duration.\n* Returns\n Nanoseconds as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:23.123456789\n 1 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.000000012 | 2023-05-15T10:13:00.000000000\n 3 | 2023-05-15T10:13:00.123456789 | 2023-05-15T10:13:00.000000000\n 4 | 2023-05-15T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n 5 | 2023-05-16T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_nanoseconds = table_with_diff.select(\n nanoseconds=pw.this[\"diff\"].dt.nanoseconds()\n)\npw.debug.compute_and_print(table_with_nanoseconds, include_id=False)\n```\n::\nResult\n```\nnanoseconds\n-23123456789\n0\n12\n123456789\n23123456789\n86423123456789\n```\n::\n::\nround(duration)\nRounds DateTime to precision specified by duration argument.\n* Parameters\n duration (`ColumnExpression` | `Timedelta` | `str`) \u2013 rounding precision\nNOTE: Duration can be given as a string, in such case we accept aliases used\nby Pandas\nthat represent a fixed duration, so e.g. \u201cM\u201d will not be accepted.\nFor ambiguous frequencies, you can use other methods, e.g. `column.dt.month()`\ninstead of `column.dt.floor(\"1M\")`.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-05-15T12:23:12\n 2 | 2023-05-15T12:33:21\n 3 | 2023-05-15T13:20:35\n 4 | 2023-05-15T13:51:41\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nres = t2.with_columns(\n rounded_to_hours=pw.this.date.dt.round(datetime.timedelta(hours=1)),\n rounded_to_10_min=pw.this.date.dt.round(datetime.timedelta(minutes=10)),\n rounded_to_15_s=pw.this.date.dt.round(datetime.timedelta(seconds=15)),\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\ndate | rounded_to_hours | rounded_to_10_min | rounded_to_15_s\n2023-05-15 12:23:12 | 2023-05-15 12:00:00 | 2023-05-15 12:20:00 | 2023-05-15 12:23:15\n2023-05-15 12:33:21 | 2023-05-15 13:00:00 | 2023-05-15 12:30:00 | 2023-05-15 12:33:15\n2023-05-15 13:20:35 | 2023-05-15 13:00:00 | 2023-05-15 13:20:00 | 2023-05-15 13:20:30\n2023-05-15 13:51:41 | 2023-05-15 14:00:00 | 2023-05-15 13:50:00 | 2023-05-15 13:51:45\n```\n::\n::\nsecond()\nExtracts seconds from a DateTime.\n* Returns\n Second as int. 0 <= second < 60\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.123456789\n 3 | 2023-05-15T10:13:23.000000000\n 4 | 2023-05-15T10:13:23.123456789\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_seconds = table_with_datetime.select(\n second=table_with_datetime.t1.dt.second()\n)\npw.debug.compute_and_print(table_with_seconds, include_id=False)\n```\n::\nResult\n```\nsecond\n0\n0\n23\n23\n```\n::\n::\nseconds()\nThe total number of seconds in a Duration.\n* Returns\n Seconds as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:23.123456789\n 1 | 2023-05-15T10:13:00.000000000 | 2023-05-15T10:13:00.000000000\n 2 | 2023-05-15T10:13:00.123456789 | 2023-05-15T10:13:00.000000000\n 3 | 2023-05-15T10:13:23.000000000 | 2023-05-15T10:13:00.000000000\n 4 | 2023-05-15T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n 5 | 2023-05-16T10:13:23.123456789 | 2023-05-15T10:13:00.000000000\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_seconds = table_with_diff.select(seconds=pw.this[\"diff\"].dt.seconds())\npw.debug.compute_and_print(table_with_seconds, include_id=False)\n```\n::\nResult\n```\nseconds\n-23\n0\n0\n23\n23\n86423\n```\n::\n::\nstrftime(fmt)\nConverts a DateTime to a string.\n* Parameters\n fmt (`ColumnExpression` | `str`) \u2013 Format string. We use the specifiers of chrono library. In most cases they are identical to standard python specifiers in strftime .\n* Returns\n str\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1970-02-03T10:13:00\n 2 | 2023-03-25T10:13:00\n 3 | 2023-03-26T12:13:00\n 4 | 2023-05-15T14:13:23\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetime = table.select(t1=pw.this.t1.dt.strptime(fmt=fmt))\ntable_formatted = table_with_datetime.select(\n date=pw.this.t1.dt.strftime(\"%d.%m.%Y\"),\n full_date=pw.this.t1.dt.strftime(\"%B %d, %Y\"),\n time_24=pw.this.t1.dt.strftime(\"%H:%M:%S\"),\n time_12=pw.this.t1.dt.strftime(\"%I:%M:%S %p\"),\n)\npw.debug.compute_and_print(table_formatted, include_id=False)\n```\n::\nResult\n```\ndate | full_date | time_24 | time_12\n03.02.1970 | February 03, 1970 | 10:13:00 | 10:13:00 AM\n15.05.2023 | May 15, 2023 | 14:13:23 | 02:13:23 PM\n25.03.2023 | March 25, 2023 | 10:13:00 | 10:13:00 AM\n26.03.2023 | March 26, 2023 | 12:13:00 | 12:13:00 PM\n```\n::\n::\nstrptime(fmt, contains_timezone=None)\nConverts a string to a DateTime. If the string contains a timezone and\na %z specifier is used, timezone-aware DateTime is created.\nThen the timezone is converted to a server timezone (see examples).\nIf the string contains no timezone, a naive (not aware of timezone) DateTime\nis created.\n* Parameters\n fmt (`ColumnExpression` | `str`) \u2013 Format string. We use the specifiers of chrono library. In most cases they are identical to standard python specifiers in strptime . contains_timezone: If fmt is not a single string (the same for all objects) but a ColumnExpression, you need to set this parameter so that the function can determine if the return type is DateTimeNaive (contains_timezone = False) or DateTimeUtc (contains_timezone = True).\n* Returns\n DateTime\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1970-02-03T10:13:00.000000000\n 2 | 2023-03-25T10:13:00.000000012\n 3 | 2023-03-26T12:13:00.123456789\n 4 | 2023-05-15T14:13:23.123456789\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S.%f\"\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(fmt=fmt))\npw.debug.compute_and_print(table_with_datetime, include_id=False)\n```\n::\nResult\n```\nt1\n1970-02-03 10:13:00\n2023-03-25 10:13:00.000000012\n2023-03-26 12:13:00.123456789\n2023-05-15 14:13:23.123456789\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 03.02.1970T10:13:00.000000000\n 2 | 25.03.2023T10:13:00.000000012\n 3 | 26.03.2023T12:13:00.123456789\n 4 | 15.05.2023T14:13:23.123456789\n'''\n)\nfmt = \"%d.%m.%YT%H:%M:%S.%f\"\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(fmt=fmt))\npw.debug.compute_and_print(table_with_datetime, include_id=False)\n```\n::\nResult\n```\nt1\n1970-02-03 10:13:00\n2023-03-25 10:13:00.000000012\n2023-03-26 12:13:00.123456789\n2023-05-15 14:13:23.123456789\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1970-02-03T10:13:00-02:00\n 2 | 2023-03-25T10:13:00+00:00\n 3 | 2023-03-26T12:13:00-01:00\n 4 | 2023-05-15T14:13:23+00:30\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S%z\"\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(fmt=fmt))\npw.debug.compute_and_print(table_with_datetime, include_id=False)\n```\n::\nResult\n```\nt1\n1970-02-03 12:13:00+00:00\n2023-03-25 10:13:00+00:00\n2023-03-26 13:13:00+00:00\n2023-05-15 13:43:23+00:00\n```\n::\n::\nsubtract_date_time_in_timezone(date_time, timezone)\nSubtracts two DateTimeNaives taking into account time zone.\n* Parameters\n * date_time (`ColumnExpression` | `Timestamp`) \u2013 DateTimeNaive to be subtracted from self.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform subtraction in.\n* Returns\n Duration\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | date1 | date2\n 1 | 2023-03-26T03:20:00 | 2023-03-26T01:20:00\n 2 | 2023-03-27T03:20:00 | 2023-03-27T01:20:00\n 3 | 2023-10-29T03:20:00 | 2023-10-29T01:20:00\n 4 | 2023-10-30T03:20:00 | 2023-10-30T01:20:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(\n date1=pw.this.date1.dt.strptime(fmt=fmt), date2=pw.this.date2.dt.strptime(fmt=fmt)\n)\nt3 = t2.with_columns(\n diff=pw.this.date1.dt.subtract_date_time_in_timezone(\n pw.this.date2, timezone=\"Europe/Warsaw\"\n ),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ndate1 | date2 | diff\n2023-03-26 03:20:00 | 2023-03-26 01:20:00 | 0 days 01:00:00\n2023-03-27 03:20:00 | 2023-03-27 01:20:00 | 0 days 02:00:00\n2023-10-29 03:20:00 | 2023-10-29 01:20:00 | 0 days 03:00:00\n2023-10-30 03:20:00 | 2023-10-30 01:20:00 | 0 days 02:00:00\n```\n::\n::\nsubtract_duration_in_timezone(duration, timezone)\nSubtracts Duration from DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be subtracted from DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform subtraction in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-26T03:23:00\n 2 | 2023-03-27T03:23:00\n 3 | 2023-10-29T03:23:00\n 4 | 2023-10-30T03:23:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nt3 = t2.with_columns(\n new_date=pw.this.date.dt.subtract_duration_in_timezone(\n datetime.timedelta(hours=2), timezone=\"Europe/Warsaw\"\n ),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ndate | new_date\n2023-03-26 03:23:00 | 2023-03-26 00:23:00\n2023-03-27 03:23:00 | 2023-03-27 01:23:00\n2023-10-29 03:23:00 | 2023-10-29 02:23:00\n2023-10-30 03:23:00 | 2023-10-30 01:23:00\n```\n::\n::\ntimestamp()\nReturns a number of nanoseconds from 1970-01-01 for naive DateTime\nand from 1970-01-01 UTC for timezone-aware datetime.\n* Returns\n Timestamp as int.\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 0 | 1969-01-01T00:00:00.000000000\n 1 | 1970-01-01T00:00:00.000000000\n 2 | 2023-01-01T00:00:00.000000000\n 3 | 2023-03-25T00:00:00.000000000\n 4 | 2023-03-25T13:45:26.000000000\n 5 | 2023-03-25T13:45:26.987654321\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f\"))\ntable_with_timestamp = table_with_datetime.select(\n timestamp=table_with_datetime.t1.dt.timestamp()\n)\npw.debug.compute_and_print(table_with_timestamp, include_id=False)\n```\n::\nResult\n```\ntimestamp\n-31536000000000000\n0\n1672531200000000000\n1679702400000000000\n1679751926000000000\n1679751926987654321\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1969-01-01T00:00:00.000000000+00:00\n 2 | 1970-01-01T00:00:00.000000000+00:00\n 3 | 1970-01-01T00:00:00.000000000+02:00\n 4 | 1970-01-01T00:00:00.000000000-03:00\n 5 | 2023-01-01T00:00:00.000000000+01:00\n 6 | 2023-03-25T00:00:00.000000000+01:00\n 7 | 2023-03-25T13:45:26.000000000+01:00\n 8 | 2023-03-25T13:45:26.987654321+01:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S.%f%z\"))\ntable_with_timestamp = table_with_datetime.select(\n timestamp=table_with_datetime.t1.dt.timestamp()\n)\npw.debug.compute_and_print(table_with_timestamp, include_id=False)\n```\n::\nResult\n```\ntimestamp\n-31536000000000000\n-7200000000000\n0\n10800000000000\n1672527600000000000\n1679698800000000000\n1679748326000000000\n1679748326987654321\n```\n::\n::\nto_naive_in_timezone(timezone)\nConverts DateTimeUtc to time zone specified as timezone argument.\n* Parameters\n timezone (`ColumnExpression` | `str`) \u2013 The time zone to convert to.\n* Returns\n DateTimeNaive\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | date_utc\n 1 | 2023-03-26T00:59:00+00:00\n 2 | 2023-03-26T01:00:00+00:00\n 3 | 2023-03-27T00:59:00+00:00\n 4 | 2023-03-27T01:00:00+00:00\n 5 | 2023-10-28T23:59:00+00:00\n 6 | 2023-10-29T00:00:00+00:00\n 7 | 2023-10-29T00:30:00+00:00\n 8 | 2023-10-29T01:00:00+00:00\n 9 | 2023-10-29T01:30:00+00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S%z\"\ntable_utc = table.select(date_utc=pw.this.date_utc.dt.strptime(fmt=fmt))\ntable_local = table_utc.with_columns(\n date=pw.this.date_utc.dt.to_naive_in_timezone(timezone=\"Europe/Warsaw\"),\n)\npw.debug.compute_and_print(table_local, include_id=False)\n```\n::\nResult\n```\ndate_utc | date\n2023-03-26 00:59:00+00:00 | 2023-03-26 01:59:00\n2023-03-26 01:00:00+00:00 | 2023-03-26 03:00:00\n2023-03-27 00:59:00+00:00 | 2023-03-27 02:59:00\n2023-03-27 01:00:00+00:00 | 2023-03-27 03:00:00\n2023-10-28 23:59:00+00:00 | 2023-10-29 01:59:00\n2023-10-29 00:00:00+00:00 | 2023-10-29 02:00:00\n2023-10-29 00:30:00+00:00 | 2023-10-29 02:30:00\n2023-10-29 01:00:00+00:00 | 2023-10-29 02:00:00\n2023-10-29 01:30:00+00:00 | 2023-10-29 02:30:00\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | date_utc\n 1 | 2023-03-12T09:59:00+00:00\n 2 | 2023-03-12T10:00:00+00:00\n 3 | 2023-03-13T09:59:00+00:00\n 4 | 2023-03-13T10:00:00+00:00\n 5 | 2023-11-05T07:59:00+00:00\n 6 | 2023-11-05T08:00:00+00:00\n 7 | 2023-11-05T08:30:00+00:00\n 8 | 2023-11-05T09:00:00+00:00\n 9 | 2023-11-05T09:30:00+00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S%z\"\ntable_utc = table.select(date_utc=pw.this.date_utc.dt.strptime(fmt=fmt))\ntable_local = table_utc.with_columns(\n date=pw.this.date_utc.dt.to_naive_in_timezone(timezone=\"America/Los_Angeles\"),\n)\npw.debug.compute_and_print(table_local, include_id=False)\n```\n::\nResult\n```\ndate_utc | date\n2023-03-12 09:59:00+00:00 | 2023-03-12 01:59:00\n2023-03-12 10:00:00+00:00 | 2023-03-12 03:00:00\n2023-03-13 09:59:00+00:00 | 2023-03-13 02:59:00\n2023-03-13 10:00:00+00:00 | 2023-03-13 03:00:00\n2023-11-05 07:59:00+00:00 | 2023-11-05 00:59:00\n2023-11-05 08:00:00+00:00 | 2023-11-05 01:00:00\n2023-11-05 08:30:00+00:00 | 2023-11-05 01:30:00\n2023-11-05 09:00:00+00:00 | 2023-11-05 01:00:00\n2023-11-05 09:30:00+00:00 | 2023-11-05 01:30:00\n```\n::\n::\nto_utc(from_timezone)\nConverts DateTimeNaive to UTC from time zone provided as from_timezone\nargument. If the given DateTime doesn\u2019t exist in the provided time zone it is\nmapped to the first existing DateTime after it. If a given DateTime corresponds\nto more than one moments in the provided time zone, it is mapped to a later\nmoment.\n* Parameters\n from_timezone (`ColumnExpression` | `str`) \u2013 The time zone to convert from.\n* Returns\n DateTimeUtc\nExamples:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-26T01:59:00\n 2 | 2023-03-26T02:30:00\n 3 | 2023-03-26T03:00:00\n 4 | 2023-03-27T01:59:00\n 5 | 2023-03-27T02:30:00\n 6 | 2023-03-27T03:00:00\n 7 | 2023-10-29T01:59:00\n 8 | 2023-10-29T02:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_local = table.select(date=pw.this.date.dt.strptime(fmt=fmt))\ntable_utc = table_local.with_columns(\n date_utc=pw.this.date.dt.to_utc(from_timezone=\"Europe/Warsaw\"),\n)\npw.debug.compute_and_print(table_utc, include_id=False)\n```\n::\nResult\n```\ndate | date_utc\n2023-03-26 01:59:00 | 2023-03-26 00:59:00+00:00\n2023-03-26 02:30:00 | 2023-03-26 01:00:00+00:00\n2023-03-26 03:00:00 | 2023-03-26 01:00:00+00:00\n2023-03-27 01:59:00 | 2023-03-26 23:59:00+00:00\n2023-03-27 02:30:00 | 2023-03-27 00:30:00+00:00\n2023-03-27 03:00:00 | 2023-03-27 01:00:00+00:00\n2023-10-29 01:59:00 | 2023-10-28 23:59:00+00:00\n2023-10-29 02:00:00 | 2023-10-29 01:00:00+00:00\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-12T01:59:00\n 2 | 2023-03-12T02:30:00\n 3 | 2023-03-12T03:00:00\n 4 | 2023-03-13T01:59:00\n 5 | 2023-03-13T02:30:00\n 6 | 2023-03-13T03:00:00\n 7 | 2023-11-05T00:59:00\n 8 | 2023-11-05T01:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_local = table.select(date=pw.this.date.dt.strptime(fmt=fmt))\ntable_utc = table_local.with_columns(\n date_utc=pw.this.date.dt.to_utc(from_timezone=\"America/Los_Angeles\"),\n)\npw.debug.compute_and_print(table_utc, include_id=False)\n```\n::\nResult\n```\ndate | date_utc\n2023-03-12 01:59:00 | 2023-03-12 09:59:00+00:00\n2023-03-12 02:30:00 | 2023-03-12 10:00:00+00:00\n2023-03-12 03:00:00 | 2023-03-12 10:00:00+00:00\n2023-03-13 01:59:00 | 2023-03-13 08:59:00+00:00\n2023-03-13 02:30:00 | 2023-03-13 09:30:00+00:00\n2023-03-13 03:00:00 | 2023-03-13 10:00:00+00:00\n2023-11-05 00:59:00 | 2023-11-05 07:59:00+00:00\n2023-11-05 01:00:00 | 2023-11-05 09:00:00+00:00\n```\n::\n::\nweekday()\nConverts a DateTime to an int representing its day of the week, where 0 denotes\na Monday, and 6 denotes a Sunday.\n* Returns\n int\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1970-02-03T10:13:00\n 2 | 2023-03-25T10:13:00\n 3 | 2023-03-26T12:13:00\n 4 | 2023-05-15T14:13:23\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetime = table.select(t1=pw.this.t1.dt.strptime(fmt=fmt))\ntable_with_dayofweek = table_with_datetime.with_columns(weekday=pw.this.t1.dt.weekday())\npw.debug.compute_and_print(table_with_dayofweek, include_id=False)\n```\n::\nResult\n```\nt1 | weekday\n1970-02-03 10:13:00 | 1\n2023-03-25 10:13:00 | 5\n2023-03-26 12:13:00 | 6\n2023-05-15 14:13:23 | 0\n```\n::\n::\nweeks()\nThe total number of weeks in a Duration.\n* Returns\n Weeks as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-03-15T00:00:00 | 2023-05-15T10:13:23\n 1 | 2023-04-15T00:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-01T10:00:00 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:00:00 | 2023-05-15T09:00:00\n 4 | 2023-05-15T10:00:00 | 2023-05-15T11:00:00\n 5 | 2023-05-16T12:13:00 | 2023-05-15T10:00:00\n 6 | 2024-05-15T14:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_weeks = table_with_diff.select(weeks=pw.this[\"diff\"].dt.weeks())\npw.debug.compute_and_print(table_with_weeks, include_id=False)\n```\n::\nResult\n```\nweeks\n-8\n-4\n-2\n0\n0\n0\n52\n```\n::\n::\nyear()\nExtracts year from a DateTime.\n* Returns\n Year as int.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1974-03-12T00:00:00\n 2 | 2023-03-25T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_years = table_with_datetime.select(year=table_with_datetime.t1.dt.year())\npw.debug.compute_and_print(table_with_years, include_id=False)\n```\n::\nResult\n```\nyear\n1974\n2023\n2023\n```\n::\n::\nclass pw.NumericalNamespace(expression)\nA module containing methods related to numbers.\nThey can be called using a num attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v\n 1 | -1\n'''\n)\ntable_abs = table.select(v_abs=table.v.num.abs())\n```\nabs()\nReturns the absolute value from a numerical value.\n* Returns\n Absolute value as float\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v\n 1 | 1\n 2 | -1\n 3 | 2.5\n 4 | -2.5\n'''\n)\ntable_abs = table.select(v_abs=table.v.num.abs())\npw.debug.compute_and_print(table_abs, include_id=False)\n```\n::\nResult\n```\nv_abs\n1.0\n1.0\n2.5\n2.5\n```\n::\n::\nfill_na(default_value)\nFill the missing values (None or NaN) in a column of a table with a specified default value.\n* Parameters\n default_value (*float*) \u2013 The value to fill in for the missing values.\n* Returns\n A new column with the missing values filled with the specified default value.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v\n 1 | 1\n 2 | 2.0\n 3 | None\n 4 | 3.5\n'''\n)\ntable_fill_na = table.select(v_filled=table.v.num.fill_na(0))\npw.debug.compute_and_print(table_fill_na, include_id=False)\n```\n::\nResult\n```\nv_filled\n0.0\n1.0\n2.0\n3.5\n```\n::\n::\nround(decimals=0)\nRound the values in a column of a table to the specified number of decimals.\n* Parameters\n * decimals (`ColumnExpression` | `int`) \u2013 The number of decimal places to round to. It can be either an\n * 0. (*integer or a reference to another column. Defaults to*) \u2013 \n* Returns\n A new column with the values rounded to the specified number of decimals.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v\n 1 | -2.18\n 2 | -1.11\n 3 | 1\n 4 | 2.1\n 5 | 3.14\n 6 | 4.17\n'''\n)\ntable_round = table.select(v_round=table.v.num.round(1))\npw.debug.compute_and_print(table_round, include_id=False)\n```\n::\nResult\n```\nv_round\n-2.2\n-1.1\n1.0\n2.1\n3.1\n4.2\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | v | precision\n 1 | 3 | 0\n 2 | 3.1 | 1\n 3 | 3.14 | 1\n 4 | 3.141 | 2\n 5 | 3.1415 | 2\n'''\n)\ntable_round = table.select(v_round=table.v.num.round(pw.this.precision))\npw.debug.compute_and_print(table_round, include_id=False)\n```\n::\nResult\n```\nv_round\n3.0\n3.1\n3.1\n3.14\n3.14\n```\n::\n::\nclass pw.StringNamespace(expression)\nA module containing methods related to string.\nThey can be called using a str attribute of an expression.\nTypical use:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | ALICE\n'''\n)\ntable += table.select(name_lower=table.name.str.lower())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_lower\nALICE | alice\n```\n::\n::\ncount(sub, start=None, end=None)\nReturns the number of non-overlapping occurrences of substring sub in the range \\[start, end).\nOptional arguments start and end are interpreted as in slice notation.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Hello\n 3 | World\n 4 | Zoo\n'''\n)\ntable += table.select(count=table.name.str.count(\"o\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | count\nAlice | 0\nHello | 1\nWorld | 1\nZoo | 2\n```\n::\n::\nendswith(suffix)\nReturns True if the string ends with suffix.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(ends_with_e=table.name.str.endswith(\"e\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | ends_with_e\nAlice | True\nBob | False\nCAROLE | False\ndavid | False\n```\n::\n::\nfind(sub, start=None, end=None)\nReturn the lowest index in the string where substring sub is found within\nthe slice s\\[start:end\\]. Optional arguments start and end are interpreted as in\nslice notation. Return -1 if sub is not found.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Hello\n 3 | World\n 4 | Zoo\n'''\n)\ntable += table.select(pos=table.name.str.find(\"o\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | pos\nAlice | -1\nHello | 4\nWorld | 1\nZoo | 1\n```\n::\n::\nlen()\nReturns the length of a string.\n* Returns\n Length of the string\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(length=table.name.str.len())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | length\nAlice | 5\nBob | 3\nCAROLE | 6\ndavid | 5\n```\n::\n::\nlower()\nReturns a lowercase copy of a string.\n* Returns\n Lowercase string\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_lower=table.name.str.lower())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_lower\nAlice | alice\nBob | bob\nCAROLE | carole\ndavid | david\n```\n::\n::\nparse_bool(true_values=['on', 'true', 'yes', '1'], false_values=['off', 'false', 'no', '0'], optional=False)\nParses the string to bool, by checking if given string is either in\ntrue_values or false_values. The given string and all values in true_vales and\nfalse_values are made lowercase, so parsing is case insensitive.\nWhen true_values and false_values arguments are\nnot provided, strings \u201cTrue\u201d, \u201cOn\u201d, \u201c1\u201d and \u201cYes\u201d are interpreted as True value,\nand \u201cFalse\u201d, \u201cOff\u201d, \u201c0\u201d, and \u201cNo\u201d are interpreted as False.\nIf true_values or false_values is provided, then these values are mapped to\nrespectively True and False, while all other either raise an exception or return\nNone, depending on argument optional.\nIf optional argument is set to True, then the\nreturn type is Optional\\[bool\\] and if some string cannot be parsed, None is\nreturned.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\ndf = pd.DataFrame({\"a\": [\"0\", \"TRUE\", \"on\"]}, dtype=str)\ntable = pw.debug.table_from_pandas(df)\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\na\n0\nTRUE\non\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = table.select(a=table.a.str.parse_bool())\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\na\nFalse\nTrue\nTrue\n```\n::\n::\nparse_float(optional=False)\nParses the string to float. If optional argument is set to True, then the\nreturn type is Optional\\[float\\] and if some string cannot be parsed, None is\nreturned.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\ndf = pd.DataFrame({\"a\": [\"-5\", \"0.1\", \"200.999\"]}, dtype=str)\ntable = pw.debug.table_from_pandas(df)\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = table.select(a=table.a.str.parse_float())\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\na\n-5.0\n0.1\n200.999\n```\n::\n::\nparse_int(optional=False)\nParses the string to int. If optional argument is set to True, then the\nreturn type is Optional\\[int\\] and if some string cannot be parsed, None is\nreturned.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\ndf = pd.DataFrame({\"a\": [\"-5\", \"0\", \"200\"]}, dtype=str)\ntable = pw.debug.table_from_pandas(df)\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = table.select(a=table.a.str.parse_int())\ntable.typehints()\n```\n::\nResult\n```\nmappingproxy({'a': })\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\na\n-5\n0\n200\n```\n::\n::\nremoveprefix(prefix, /)\nIf the string starts with prefix, returns a copy of the string without the prefix.\nOtherwise returns the original string.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(without_da=table.name.str.removeprefix(\"da\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | without_da\nAlice | Alice\nBob | Bob\nCAROLE | CAROLE\ndavid | vid\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | note | prefix\n 1 | AAA | A\n 2 | BB | B\n'''\n)\ntable = table.select(\n pw.this.note,\n new_note=pw.this.note.str.removeprefix(pw.this.prefix)\n)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nnote | new_note\nAAA | AA\nBB | B\n```\n::\n::\nremovesuffix(suffix, /)\nIf the string ends with suffix, returns a copy of the string without the suffix.\nOtherwise returns the original string.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(without_LE=table.name.str.removesuffix(\"LE\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | without_LE\nAlice | Alice\nBob | Bob\nCAROLE | CARO\ndavid | david\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | fruit | suffix\n 1 | bamboo | o\n 2 | banana | na\n'''\n)\ntable = table.select(\n pw.this.fruit,\n fruit_cropped=pw.this.fruit.str.removesuffix(pw.this.suffix)\n)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nfruit | fruit_cropped\nbamboo | bambo\nbanana | bana\n```\n::\n::\nreplace(old_value, new_value, count=-1, /)\nReturns the a string where the occurrences of the old_value substrings are\n replaced by the new_value substring.\n* Parameters\n count (`ColumnExpression` | `int`) \u2013 Maximum number of occurrences to replace. When set to -1, replaces\n all occurrences. Defaults to -1.\n* Returns\n The new string where old_value is replaced by new_value\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n 5 | Edward\n'''\n)\ntable += table.select(name_replace=table.name.str.replace(\"d\",\"Z\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_replace\nAlice | Alice\nBob | Bob\nCAROLE | CAROLE\nEdward | EZwarZ\ndavid | ZaviZ\n```\n::\n::\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\ntable = pw.debug.table_from_markdown(\n '''\n | value | old | new | count\n 1 | Scaciscics | c | t | 3\n 2 | yelliwwiid | i | o | 2\n'''\n)\ntable = table.select(\n pw.this.value,\n value_replace=pw.this.value.str.replace(\n pw.this.old, pw.this.new, pw.this.count\n )\n)\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nvalue | value_replace\nScaciscics | Statistics\nyelliwwiid | yellowwoid\n```\n::\n::\nreversed()\nReturns a reverse copy of a string.\n* Returns\n Reverse string\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_reverse=table.name.str.reversed())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_reverse\nAlice | ecilA\nBob | boB\nCAROLE | ELORAC\ndavid | divad\n```\n::\n::\nrfind(sub, start=None, end=None)\nReturn the highest index in the string where substring sub is found within\nthe slice s\\[start:end\\]. Optional arguments start and end are interpreted as in\nslice notation. Return -1 if sub is not found.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Hello\n 3 | World\n 4 | Zoo\n'''\n)\ntable += table.select(pos=table.name.str.rfind(\"o\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | pos\nAlice | -1\nHello | 4\nWorld | 1\nZoo | 2\n```\n::\n::\nslice(start, end, /)\nReturn a slice of the string.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(slice=table.name.str.slice(1,4))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | slice\nAlice | lic\nBob | ob\nCAROLE | ARO\ndavid | avi\n```\n::\n::\nstartswith(prefix)\nReturns True if the string starts with prefix.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(starts_with_A=table.name.str.startswith(\"A\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | starts_with_A\nAlice | True\nBob | False\nCAROLE | False\ndavid | False\n```\n::\n::\nstrip(chars=None)\nReturns a copy of the string with specified leading and trailing characters\nremoved. If no arguments are passed, remove the leading and trailing whitespaces.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_strip=table.name.str.strip(\"Aod\"))\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_strip\nAlice | lice\nBob | Bob\nCAROLE | CAROLE\ndavid | avi\n```\n::\n::\nswapcase()\nReturns a copy of the string where the case is inverted.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_swap=table.name.str.swapcase())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_swap\nAlice | aLICE\nBob | bOB\nCAROLE | carole\ndavid | DAVID\n```\n::\n::\ntitle()\nReturns a copy of the string where where words start with an uppercase character\nand the remaining characters are lowercase.\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | col\n 1 | title\n'''\n)\ntable = table.select(col_title=table[\"col\"].str.title())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\ncol_title\nTitle\n```\n::\n::\nupper()\nReturns a uppercase copy of a string.\n* Returns\n Uppercase string\nExample:\n"} -{"doc": "---\ntitle: Expressions API\nsidebar: 'API'\nnavigation: true\n---\n# Expressions API\nclass pw.DateTimeNamespace(expression)\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\nTypical use:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n* Parameters\n * duration (`ColumnExpression` | `Timedelta`) \u2013 Duration to be added to DateTime.\n * timezone (`ColumnExpression` | `str`) \u2013 The time zone to perform addition in.\n* Returns\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | name\n 1 | Alice\n 2 | Bob\n 3 | CAROLE\n 4 | david\n'''\n)\ntable += table.select(name_upper=table.name.str.upper())\npw.debug.compute_and_print(table, include_id=False)\n```\n::\nResult\n```\nname | name_upper\nAlice | ALICE\nBob | BOB\nCAROLE | CAROLE\ndavid | DAVID\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(10).as_integer_ratio()\n```\n::\nResult\n```\n(10, 1)\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(-10).as_integer_ratio()\n```\n::\nResult\n```\n(-10, 1)\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(0).as_integer_ratio()\n```\n::\nResult\n```\n(0, 1)\n```\n::\n::\nbit_count()\nNumber of ones in the binary representation of the absolute value of self.\nAlso known as the population count.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\nbin(13)\n```\n::\nResult\n```\n'0b1101'\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(13).bit_count()\n```\n::\nResult\n```\n3\n```\n::\n::\nbit_length()\nNumber of bits necessary to represent self in binary.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\nbin(37)\n```\n::\nResult\n```\n'0b100101'\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(37).bit_length()\n```\n::\nResult\n```\n6\n```\n::\n::\nconjugate()\nReturns self, the complex conjugate of any int.\ndenominator()\nthe denominator of a rational number in lowest terms\nfrom_bytes(byteorder='big', *, signed=False)\nReturn the integer represented by the given array of bytes.\nbytes\n Holds the array of bytes to convert. The argument must either\n support the buffer protocol or be an iterable object producing bytes.\n Bytes and bytearray are examples of built-in objects that support the\n buffer protocol.\nbyteorder\n The byte order used to represent the integer. If byteorder is \u2018big\u2019,\n the most significant byte is at the beginning of the byte array. If\n byteorder is \u2018little\u2019, the most significant byte is at the end of the\n byte array. To request the native byte order of the host system, use\n ```\n `\n ```\n sys.byteorder\u2019 as the byte order value. Default is to use \u2018big\u2019.\nsigned\n Indicates whether two\u2019s complement is used to represent the integer.\nimag()\nthe imaginary part of a complex number\nnumerator()\nthe numerator of a rational number in lowest terms\nreal()\nthe real part of a complex number\nto_bytes(length=1, byteorder='big', *, signed=False)\nReturn an array of bytes representing an integer.\nlength\n Length of bytes object to use. An OverflowError is raised if the\n integer is not representable with the given number of bytes. Default\n is length 1.\nbyteorder\n The byte order used to represent the integer. If byteorder is \u2018big\u2019,\n the most significant byte is at the beginning of the byte array. If\n byteorder is \u2018little\u2019, the most significant byte is at the end of the\n byte array. To request the native byte order of the host system, use\n ```\n `\n ```\n sys.byteorder\u2019 as the byte order value. Default is to use \u2018big\u2019.\nsigned\n Determines whether two\u2019s complement is used to represent the integer.\n If signed is False and a negative integer is given, an OverflowError\n is raised.\nclass pw.ml.smart_table_ops.FuzzyJoinNormalization(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(10).as_integer_ratio()\n```\n::\nResult\n```\n(10, 1)\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(-10).as_integer_ratio()\n```\n::\nResult\n```\n(-10, 1)\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(0).as_integer_ratio()\n```\n::\nResult\n```\n(0, 1)\n```\n::\n::\nbit_count()\nNumber of ones in the binary representation of the absolute value of self.\nAlso known as the population count.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\nbin(13)\n```\n::\nResult\n```\n'0b1101'\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(13).bit_count()\n```\n::\nResult\n```\n3\n```\n::\n::\nbit_length()\nNumber of bits necessary to represent self in binary.\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\nbin(37)\n```\n::\nResult\n```\n'0b100101'\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.stdlib.ml.smart_table_ops package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.ml.smart_table_ops package\nclass pw.ml.smart_table_ops.Edge()\nclass pw.ml.smart_table_ops.Feature()\nclass pw.ml.smart_table_ops.FuzzyJoinFeatureGeneration(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nas_integer_ratio()\nReturn integer ratio.\nReturn a pair of integers, whose ratio is exactly equal to the original int\nand with a positive denominator.\n```python\n(37).bit_length()\n```\n::\nResult\n```\n6\n```\n::\n::\nconjugate()\nReturns self, the complex conjugate of any int.\ndenominator()\nthe denominator of a rational number in lowest terms\nfrom_bytes(byteorder='big', *, signed=False)\nReturn the integer represented by the given array of bytes.\nbytes\n Holds the array of bytes to convert. The argument must either\n support the buffer protocol or be an iterable object producing bytes.\n Bytes and bytearray are examples of built-in objects that support the\n buffer protocol.\nbyteorder\n The byte order used to represent the integer. If byteorder is \u2018big\u2019,\n the most significant byte is at the beginning of the byte array. If\n byteorder is \u2018little\u2019, the most significant byte is at the end of the\n byte array. To request the native byte order of the host system, use\n ```\n `\n ```\n sys.byteorder\u2019 as the byte order value. Default is to use \u2018big\u2019.\nsigned\n Indicates whether two\u2019s complement is used to represent the integer.\nimag()\nthe imaginary part of a complex number\nnumerator()\nthe numerator of a rational number in lowest terms\nreal()\nthe real part of a complex number\nto_bytes(length=1, byteorder='big', *, signed=False)\nReturn an array of bytes representing an integer.\nlength\n Length of bytes object to use. An OverflowError is raised if the\n integer is not representable with the given number of bytes. Default\n is length 1.\nbyteorder\n The byte order used to represent the integer. If byteorder is \u2018big\u2019,\n the most significant byte is at the beginning of the byte array. If\n byteorder is \u2018little\u2019, the most significant byte is at the end of the\n byte array. To request the native byte order of the host system, use\n ```\n `\n ```\n sys.byteorder\u2019 as the byte order value. Default is to use \u2018big\u2019.\nsigned\n Determines whether two\u2019s complement is used to represent the integer.\n If signed is False and a negative integer is given, an OverflowError\n is raised.\nclass pw.ml.smart_table_ops.JoinResult()\nclass pw.ml.smart_table_ops.Node()\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice 1\n2 9 Bob 1\n3 8 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n age owner pet size\n11 10 Alice 3 M\n12 9 Bob 1 L\n13 8 Tom 1 XL\n''')\njoinresult= t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner) # noqa: E501\nisinstance(joinresult, pw.JoinResult)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\npw.debug.compute_and_print(joinresult.select(t1.age, t2.size), include_id=False)\n```\n::\nResult\n```\nage | size\n9 | L\n```\n::\n::\nproperty C(: ColumnNamespace )\nReturns the namespace of all the columns of a joinable.\nAllows accessing column names that might otherwise be a reserved methods.\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\ntab = pw.debug.table_from_markdown('''\nage | owner | pet | filter\n10 | Alice | dog | True\n9 | Bob | dog | True\n8 | Alice | cat | False\n7 | Bob | dog | True\n''')\nisinstance(tab.C.age, pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\npw.debug.compute_and_print(tab.filter(tab.C.filter), include_id=False)\n```\n::\nResult\n```\nage | owner | pet | filter\n7 | Bob | dog | True\n9 | Bob | dog | True\n10 | Alice | dog | True\n```\n::\n::\nfilter(filter_expression)\nFilters rows, keeping the ones satisfying the predicate.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice 1\n2 9 Bob 1\n3 8 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n age owner pet size\n11 10 Alice 3 M\n12 9 Bob 1 L\n13 8 Tom 1 XL\n''')\nresult = t1.join(t2).filter(t1.owner == t2.owner).select(t1.age, t2.size) # noqa: E501\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | size\n8 | M\n9 | L\n10 | M\n```\n::\n::\ngroupby(*args, id=None)\nGroups join result by columns from args.\nNOTE: Usually followed by .reduce() that aggregates the result and returns a table.\n* Parameters\n * args (`ColumnReference`) \u2013 columns to group by.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 if provided, is the column used to set id\u2019s of the rows of the result\n* Returns\n *GroupedJoinResult* \u2013 Groupby object.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n cost owner pet\n1 100 Alice 1\n2 90 Bob 1\n3 80 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n cost owner pet size\n11 100 Alice 3 M\n12 90 Bob 1 L\n13 80 Tom 1 XL\n''')\nresult = (t1.join(t2, t1.owner==t2.owner).groupby(pw.this.owner)\n .reduce(pw.this.owner, pairs = pw.reducers.count()))\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nowner | pairs\nAlice | 2\nBob | 1\n```\n::\n::\njoin(other, *on, id=None, how=JoinMode.INNER)\nJoin self with other using the given join expression.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT,RIGHT,OUTER}\n correspond to inner, left, right and outer join respectively.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(\n t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER\n).select(age=t1.age, owner_name=t2.owner, size=t2.size)\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_inner(other, *on, id=None)\nInner-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER).select(age=t1.age, owner_name=t2.owner, size=t2.size) # noqa: E501\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_left(other, *on, id=None)\nLeft-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks:\nargs cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- rows from the right side that were not matched with the left side are skipped\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_left(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_outer(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_outer(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t1.id, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_right(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- rows from the left side that were not matched with the right side are skipped\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_right(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(pw.coalesce(t1.b,0) + t2.d,t1.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n```\n::\n::\n* Returns\n OuterJoinResult object\npromise_universe_is_equal_to(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the others.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n None\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nimport pytest\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 8 | Alice | cat\n2 | 9 | Bob | dog\n3 | 15 | Alice | tortoise\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | age | owner\n1 | 11 | Alice\n2 | 12 | Tom\n3 | 7 | Eve\n'''\n)\nt3 = t2.filter(pw.this.age > 10)\nwith pytest.raises(\n ValueError,\n match='Universe of the argument of Table.update_cells\\(\\) needs ' # noqa\n + 'to be a subset of the universe of the updated table.',\n):\n t1.update_cells(t3)\nt1 = t1.promise_universe_is_equal_to(t2)\nresult = t1.update_cells(t3)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n11 | Alice | cat\n12 | Tom | dog\n15 | Alice | tortoise\n```\n::\n::\npromise_universe_is_subset_of(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the other.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n''').promise_universe_is_subset_of(t1)\nt3 = t1 << t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\npromise_universes_are_disjoint(other)\nAsserts to Pathway that an universe of self is disjoint from universe of other.\nSemantics: Used in situations where Pathway cannot deduce universes are disjoint.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''').promise_universes_are_disjoint(t1)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\nreduce(*args, kwargs)\nReduce a join result to a single row.\nEquivalent to self.groupby().reduce(\\*args, \\*\\*kwargs).\n* Parameters\n * args (`ColumnReference`) \u2013 reducer to reduce the table with\n * kwargs (`ColumnExpression`) \u2013 reducer to reduce the table with. Its key is the new name of a column.\n* Returns\n *Table* \u2013 Reduced table.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n cost owner pet\n1 100 Alice 1\n2 90 Bob 1\n3 80 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n cost owner pet size\n11 100 Alice 3 M\n12 90 Bob 1 L\n13 80 Tom 1 XL\n''')\nresult = t1.join(t2, t1.owner==t2.owner).reduce(total_pairs = pw.reducers.count())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\ntotal_pairs\n3\n```\n::\n::\nselect(*args, kwargs)\nComputes result of a join.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`Any`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner).select(age=t1.age, owner_name=t2.owner, size=t2.size) # noqa: E501\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\nclass pw.Joinable(context)\nproperty C(: ColumnNamespace )\nReturns the namespace of all the columns of a joinable.\nAllows accessing column names that might otherwise be a reserved methods.\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\ntab = pw.debug.table_from_markdown('''\nage | owner | pet | filter\n10 | Alice | dog | True\n9 | Bob | dog | True\n8 | Alice | cat | False\n7 | Bob | dog | True\n''')\nisinstance(tab.C.age, pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\npw.debug.compute_and_print(tab.filter(tab.C.filter), include_id=False)\n```\n::\nResult\n```\nage | owner | pet | filter\n7 | Bob | dog | True\n9 | Bob | dog | True\n10 | Alice | dog | True\n```\n::\n::\njoin(other, *on, id=None, how=JoinMode.INNER)\nJoin self with other using the given join expression.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n * how (`JoinMode`) \u2013 by default, inner join is performed. Possible values are JoinMode.{INNER,LEFT,RIGHT,OUTER}\n correspond to inner, left, right and outer join respectively.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(\n t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER\n).select(age=t1.age, owner_name=t2.owner, size=t2.size)\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_inner(other, *on, id=None)\nInner-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 the right side of the join.\n * on (`ColumnExpression`) \u2013 a list of column expressions. Each must have == as the top level operation\n and be of the form LHS: ColumnReference == RHS: ColumnReference.\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional argument for id of result, can be only self.id or other.id\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, t1.pet == t2.pet, t1.owner == t2.owner, how=pw.JoinMode.INNER).select(age=t1.age, owner_name=t2.owner, size=t2.size) # noqa: E501\npw.debug.compute_and_print(t3, include_id = False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\njoin_left(other, *on, id=None)\nLeft-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks:\nargs cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- rows from the right side that were not matched with the left side are skipped\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_left(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_outer(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- for rows from the left side that were not matched with the right side,\nmissing values on the right are replaced with None\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_outer(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(t1.b + t2.d, t1.id, t2.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n13 | |\n13 | |\n```\n::\n::\njoin_right(other, *on, id=None)\nOuter-joins two tables or join results.\n* Parameters\n * other (`Joinable`) \u2013 Table or join result.\n * \\*on (`ColumnExpression`) \u2013 Columns to join, syntax self.col1 == other.col2\n * id (`Optional`\\[`ColumnReference`\\]) \u2013 optional id column of the result\nRemarks: args cannot contain id column from either of tables, as the result table has id column with auto-generated ids; it can be selected by assigning it to a column with defined name (passed in kwargs)\nBehavior:\n- rows from the left side that were not matched with the right side are skipped\n- for rows from the right side that were not matched with the left side,\nmissing values on the left are replaced with None\n- for rows that were matched the behavior is the same as that of an inner join.\n* Returns\n *JoinResult* \u2013 an object on which .select() may be called to extract relevant\n columns from the result of the join.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b\n 1 | 11 | 111\n 2 | 12 | 112\n 3 | 13 | 113\n 4 | 13 | 114\n '''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | c | d\n 1 | 11 | 211\n 2 | 12 | 212\n 3 | 14 | 213\n 4 | 14 | 214\n '''\n)\npw.debug.compute_and_print(t1.join_right(t2, t1.a == t2.c\n).select(t1.a, t2_c=t2.c, s=pw.require(pw.coalesce(t1.b,0) + t2.d,t1.id)),\ninclude_id=False)\n```\n::\nResult\n```\na | t2_c | s\n | 14 |\n | 14 |\n11 | 11 | 322\n12 | 12 | 324\n```\n::\n::\n* Returns\n OuterJoinResult object\npromise_universe_is_equal_to(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the others.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n None\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nimport pytest\nt1 = pw.debug.table_from_markdown(\n '''\n | age | owner | pet\n1 | 8 | Alice | cat\n2 | 9 | Bob | dog\n3 | 15 | Alice | tortoise\n'''\n)\nt2 = pw.debug.table_from_markdown(\n '''\n | age | owner\n1 | 11 | Alice\n2 | 12 | Tom\n3 | 7 | Eve\n'''\n)\nt3 = t2.filter(pw.this.age > 10)\nwith pytest.raises(\n ValueError,\n match='Universe of the argument of Table.update_cells\\(\\) needs ' # noqa\n + 'to be a subset of the universe of the updated table.',\n):\n t1.update_cells(t3)\nt1 = t1.promise_universe_is_equal_to(t2)\nresult = t1.update_cells(t3)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n11 | Alice | cat\n12 | Tom | dog\n15 | Alice | tortoise\n```\n::\n::\npromise_universe_is_subset_of(other)\nAsserts to Pathway that an universe of self is a subset of universe of each of the other.\nSemantics: Used in situations where Pathway cannot deduce one universe being a subset of another.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 30\n''').promise_universe_is_subset_of(t1)\nt3 = t1 << t2\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 30\n```\n::\n::\npromise_universes_are_disjoint(other)\nAsserts to Pathway that an universe of self is disjoint from universe of other.\nSemantics: Used in situations where Pathway cannot deduce universes are disjoint.\n* Returns\n self\nNOTE: The assertion works in place.\nExample:\n"} -{"doc": "---\ntitle: Join API\nsidebar: 'API'\nnavigation: true\n---\n# Join API\nContains reference for helper classes related to joins.\nclass pw.JoinMode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nclass pw.JoinResult(_context, _inner_table, _columns_mapping, _left_table, _right_table, _original_left, _original_right, _substitution, _joined_on_names, _join_mode)\nResult of a join between tables.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | age | owner | pet\n1 | 10 | Alice | 1\n2 | 9 | Bob | 1\n3 | 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\n | age | owner | pet\n11 | 11 | Alice | 30\n12 | 12 | Tom | 40\n''').promise_universes_are_disjoint(t1)\nt3 = t1.concat(t2)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner | pet\n8 | Alice | 2\n9 | Bob | 1\n10 | Alice | 1\n11 | Alice | 30\n12 | Tom | 40\n```\n::\n::\n"} -{"doc": "---\ntitle: Groupby API\nsidebar: 'API'\nnavigation: true\n---\n# Groupby API\nContains reference for helper classes related to groupby.\nclass pw.GroupedJoinResult(*, join_result, args, id)\nreduce(*args, kwargs)\nReduces grouped join result to table.\n* Returns\n *Table* \u2013 Created table.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n cost owner pet\n1 100 Alice 1\n2 90 Bob 1\n3 80 Alice 2\n''')\nt2 = pw.debug.table_from_markdown('''\n cost owner pet size\n11 100 Alice 3 M\n12 90 Bob 1 L\n13 80 Tom 1 XL\n''')\nresult = (t1.join(t2, t1.owner==t2.owner).groupby(pw.this.owner)\n .reduce(pw.this.owner, pairs = pw.reducers.count()))\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nowner | pairs\nAlice | 2\nBob | 1\n```\n::\n::\nclass pw.GroupedJoinable(_universe, _substitution, _joinable)\nclass pw.GroupedTable(table, grouping_columns, set_id=False, sort_by=None, _filter_out_results_of_forgetting=False)\nResult of a groupby operation on a Table.\nExample:\n"} -{"doc": "---\ntitle: Groupby API\nsidebar: 'API'\nnavigation: true\n---\n# Groupby API\nContains reference for helper classes related to groupby.\nclass pw.GroupedJoinResult(*, join_result, args, id)\nreduce(*args, kwargs)\nReduces grouped join result to table.\n* Returns\n *Table* \u2013 Created table.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.groupby(t1.pet, t1.owner)\nisinstance(t2, pw.GroupedTable)\n```\n::\nResult\n```\nTrue\n```\n::\n::\nreduce(*args, kwargs)\nReduces grouped table to a table.\n* Parameters\n * args (`ColumnReference`) \u2013 Column references.\n * kwargs (`ColumnExpression`) \u2013 Column expressions with their new assigned names.\n* Returns\n *Table* \u2013 Created table.\nExample:\n"} -{"doc": "---\ntitle: Groupby API\nsidebar: 'API'\nnavigation: true\n---\n# Groupby API\nContains reference for helper classes related to groupby.\nclass pw.GroupedJoinResult(*, join_result, args, id)\nreduce(*args, kwargs)\nReduces grouped join result to table.\n* Returns\n *Table* \u2013 Created table.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.groupby(t1.pet, t1.owner).reduce(t1.owner, t1.pet, ageagg=pw.reducers.sum(t1.age))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | pet | ageagg\nAlice | cat | 8\nAlice | dog | 10\nBob | dog | 16\n```\n::\n::\nclass pw.ReducerExpressionSplitter()\n"} -{"doc": "pathway.stdlib.utils.pandas_transformer module\npw.utils.pandas_transformer.pandas_transformer(output_schema, output_universe=None)\nDecorator that turns python function operating on pandas.DataFrame into pathway transformer.\nInput universes are converted into input DataFrame indexes.\nThe resulting index is treated as the output universe, so it must maintain uniqueness\nand be of integer type.\n* Parameters\n * output_schema (`type`\\[`Schema`\\]) \u2013 Schema of a resulting table.\n * output_universe (`UnionType`\\[`str`, `int`, `None`\\]) \u2013 Index or name of an argument whose universe will be used in resulting table. Defaults to None.\n* Returns\n Transformer that can be applied on Pathway tables.\nExample:\nCode\n```python\nimport pathway as pw\ninput = pw.debug.table_from_markdown(\n '''\n | foo | bar\n0 | 10 | 100\n1 | 20 | 200\n2 | 30 | 300\n'''\n)\nclass Output(pw.Schema):\n sum: int\n@pw.pandas_transformer(output_schema=Output)\ndef sum_cols(t: pd.DataFrame) -> pd.DataFrame:\n return pd.DataFrame(t.sum(axis=1))\noutput = sum_cols(input)\npw.debug.compute_and_print(output, include_id=False)\n```\n::\nResult\n```\nsum\n110\n220\n330\n```\n::\n::\n"} -{"doc": "pathway.stdlib.utils.col module\nFunctions\npw.utils.col.apply_all_rows(*cols, fun, result_col_name)\nApplies a function to all the data in selected columns at once, returning a single column.\nThis transformer is meant to be run infrequently on a relativelly small tables.\nInput:\n- cols: list of columns to which function will be applied\n- fun: function taking lists of columns and returning a corresponding list of outputs.\n- result_col_name: name of the output column\nOutput:\n- Table indexed with original indices with a single column named by \u201cresult_col_name\u201d argument\ncontaining results of the apply\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n'''\n | colA | colB\n1 | 1 | 10\n2 | 2 | 20\n3 | 3 | 30\n''')\ndef add_total_sum(col1, col2):\n sum_all = sum(col1) + sum(col2)\n return [x + sum_all for x in col1]\nresult = pw.utils.col.apply_all_rows(\n table.colA, table.colB, fun=add_total_sum, result_col_name=\"res\"\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nres\n67\n68\n69\n```\n::\n::\npw.utils.col.flatten_column(column, origin_id=.origin_id)\nDeprecated: use pw.Table.flatten instead.\nFlattens a column of a table.\nInput:\n- column: Column expression of column to be flattened\n- origin_id: name of output column where to store id\u2019s of input rows\nOutput:\n- Table with columns: colname_to_flatten and origin_id (if not None)\n"} -{"doc": "pathway.stdlib.utils.async_transformer module\nclass pw.utils.async_transformer.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\nCode\n```python\nimport pathway as pw\nimport asyncio\nclass OutputSchema(pw.Schema):\n ret: int\nclass AsyncIncrementTransformer(pw.AsyncTransformer, output_schema=OutputSchema):\n async def invoke(self, value) -> Dict[str, Any]:\n await asyncio.sleep(0.1)\n return {\"ret\": value + 1 }\ninput = pw.debug.table_from_markdown('''\n | value\n1 | 42\n2 | 44\n''')\nresult = AsyncIncrementTransformer(input_table=input).result\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nret\n43\n45\n```\n::\n::\nclose()\nCalled once at the end. Proper place for cleanup.\nabstract async invoke(*args, kwargs)\nCalled for every row of input_table. The arguments will correspond to the\ncolumns in the input table.\nShould return dict of values matching `output_schema`.\nopen()\nCalled before actual work. Suitable for one time setup.\nproperty result(: Table )\nResulting table.\nwith_options(capacity=None, retry_strategy=None, cache_strategy=None)\nSets async options.\n* Parameters\n * capacity (`Optional`\\[`int`\\]) \u2013 maximum number of concurrent operations.\n * retry_strategy (`Optional`\\[`AsyncRetryStrategy`\\]) \u2013 defines how failures will be handled.\n* Returns\n self\n"} -{"doc": "Example\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | pet | age\n1 | Dog | 2\n7 | Cat | 5\n''')\nt2 = pw.utils.col.flatten_column(t1.pet)\npw.debug.compute_and_print(t2.without(pw.this.origin_id), include_id=False)\n```\n::\nResult\n```\npet\nC\nD\na\ng\no\nt\n```\n::\n::\npw.utils.col.groupby_reduce_majority(column_group, column_val)\nFinds a majority in column_val for every group in column_group.\nWorkaround for missing majority reducer.\nExample:\n"} -{"doc": "Example\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n'''\n | group | vote\n0 | 1 | pizza\n1 | 1 | pizza\n2 | 1 | hotdog\n3 | 2 | hotdog\n4 | 2 | pasta\n5 | 2 | pasta\n6 | 2 | pasta\n''')\nresult = pw.utils.col.groupby_reduce_majority(table.group, table.vote)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\ngroup | majority\n1 | pizza\n2 | pasta\n```\n::\n::\npw.utils.col.multiapply_all_rows(*cols, fun, result_col_names)\nApplies a function to all the data in selected columns at once, returning multiple columns.\nThis transformer is meant to be run infrequently on a relativelly small tables.\nInput:\n- cols: list of columns to which function will be applied\n- fun: function taking lists of columns and returning a corresponding list of outputs.\n- result_col_names: names of the output columns\nOutput:\n- Table indexed with original indices with columns named by \u201cresult_col_names\u201d argument\ncontaining results of the apply\nExample:\n"} -{"doc": "Example\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n'''\n | colA | colB\n1 | 1 | 10\n2 | 2 | 20\n3 | 3 | 30\n''')\ndef add_total_sum(col1, col2):\n sum_all = sum(col1) + sum(col2)\n return [x + sum_all for x in col1], [x + sum_all for x in col2]\nresult = pw.utils.col.multiapply_all_rows(\n table.colA, table.colB, fun=add_total_sum, result_col_names=[\"res1\", \"res2\"]\n)\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nres1 | res2\n67 | 76\n68 | 86\n69 | 96\n```\n::\n::\npw.utils.col.unpack_col(column, *unpacked_columns, schema=None)\nUnpacks multiple columns from a single column.\nArguments unpacked_columns and schema are mutually exclusive\nInput:\n- column: Column expression of column containing some sequences\n- unpacked_columns: list of names of output columns\n- schema: Schema of new columns\nOutput:\n- Table with columns named by \u201cunpacked_columns\u201d argument\nExamples:\n"} -{"doc": "Example\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n'''\n | colA | colB | colC\n1 | Alice | 25 | dog\n2 | Bob | 32 | cat\n3 | Carole | 28 | dog\n''')\nt2 = t1.select(user = pw.make_tuple(pw.this.colA, pw.this.colB, pw.this.colC))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nuser\n('Alice', 25, 'dog')\n('Bob', 32, 'cat')\n('Carole', 28, 'dog')\n```\n::\n::\n"} -{"doc": "Example\n```python\nclass SomeSchema(pw.Schema):\n name: str\n age: int\n pet: str\nunpack_table = pw.utils.col.unpack_col(t2.user, schema=SomeSchema)\npw.debug.compute_and_print(unpack_table, include_id=False)\n```\n::\nResult\n```\nname | age | pet\nAlice | 25 | dog\nBob | 32 | cat\nCarole | 28 | dog\n```\n::\n::"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport asyncio\nclass OutputSchema(pw.Schema):\n ret: int\nclass AsyncIncrementTransformer(pw.AsyncTransformer, output_schema=OutputSchema):\n async def invoke(self, value) -> Dict[str, Any]:\n await asyncio.sleep(0.1)\n return {\"ret\": value + 1 }\ninput = pw.debug.table_from_markdown('''\n | value\n1 | 42\n2 | 44\n''')\nresult = AsyncIncrementTransformer(input_table=input).result\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nret\n43\n45\n```\n::\n::\nclose()\nCalled once at the end. Proper place for cleanup.\nabstract async invoke(*args, kwargs)\nCalled for every row of input_table. The arguments will correspond to the\ncolumns in the input table.\nShould return dict of values matching `output_schema`.\nopen()\nCalled before actual work. Suitable for one time setup.\nwith_options(capacity=None, retry_strategy=None, cache_strategy=None)\nSets async options.\n* Parameters\n * capacity (`Optional`\\[`int`\\]) \u2013 maximum number of concurrent operations.\n * retry_strategy (`Optional`\\[`AsyncRetryStrategy`\\]) \u2013 defines how failures will be handled.\n* Returns\n self\nproperty result(: Table )\nResulting table.\nclass pw.BaseCustomAccumulator()\nUtility class for defining custom accumulators, used for custom reducers.\nCustom accumulators should inherit from this class, and should implement from_row,\nupdate and compute_result. Optionally neutral and retract can be provided\nfor more efficient processing on streams with changing data.\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nclass CustomAvgAccumulator(pw.BaseCustomAccumulator):\n def __init__(self, sum, cnt):\n self.sum = sum\n self.cnt = cnt\n @classmethod\n def from_row(self, row):\n [val] = row\n return CustomAvgAccumulator(val, 1)\n def update(self, other):\n self.sum += other.sum\n self.cnt += other.cnt\n def compute_result(self) -> float:\n return self.sum / self.cnt\nimport sys; sys.modules[__name__].CustomAvgAccumulator = CustomAvgAccumulator # NOSHOW\ncustom_avg = pw.reducers.udf_reducer(CustomAvgAccumulator)\nt1 = pw.debug.parse_to_table('''\nage | owner | pet | price\n10 | Alice | dog | 100\n9 | Bob | cat | 80\n8 | Alice | cat | 90\n7 | Bob | dog | 70\n''')\nt2 = t1.groupby(t1.owner).reduce(t1.owner, avg_price=custom_avg(t1.price))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | avg_price\nAlice | 95.0\nBob | 75.0\n```\n::\n::\nabstract compute_result()\nMandatory function to finalize computation.\nUsed to extract answer from final state of accumulator.\nNarrowing the type of this function helps better type the output of the reducer.\nabstract classmethod from_row(row)\nConstruct the accumulator from a row of data.\nRow will be passed as a list of values.\nThis is a mandatory function.\nclassmethod neutral()\nNeutral element of the accumulator (aggregation of an empty list).\nThis function is optional, and allows for more efficient processing on streams\nwith changing data.\nretract(other)\nUpdate the accumulator by removing the value of another one.\nThis function is optional, and allows more efficient reductions on streams\nwith changing data.\nabstract update(other)\nUpdate the accumulator with another one.\nMethod does not need to return anything, the change should be in-place.\nThis is a mandatory function.\nclass pw.ClassArg(ref: RowReference, ptr: Pointer)\nBase class to inherit from when writing inner classes for class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.output_attribute\n def ret(self) -> int:\n return self.arg + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7\n''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 8\n8 | 9\n9 | 10\n10 | 11\n```\n::\n::\npointer_from(*args, optional=False)\nPseudo-random hash of its argument. Produces pointer types. Applied value-wise.\nclass pw.ColumnExpression()\nas_bool()\nConverts value to a bool or None if not possible.\nCurrently works for Json columns only.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\nclass InputSchema(pw.Schema):\n data: dict\ndt = pd.DataFrame(data={\"data\": [{\"value\": True}, {\"value\": False}]})\ntable = pw.debug.table_from_pandas(dt, schema=InputSchema)\nresult = table.select(result=pw.this.data.get(\"value\").as_bool())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nresult\nFalse\nTrue\n```\n::\n::\nas_float()\nConverts value to a float or None if not possible.\nCurrently works for Json columns only.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\nclass InputSchema(pw.Schema):\n data: dict\ndt = pd.DataFrame(data={\"data\": [{\"value\": 1.5}, {\"value\": 3.14}]})\ntable = pw.debug.table_from_pandas(dt, schema=InputSchema)\nresult = table.select(result=pw.this.data.get(\"value\").as_float())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nresult\n1.5\n3.14\n```\n::\n::\nas_int()\nConverts value to an int or None if not possible.\nCurrently works for Json columns only.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\nclass InputSchema(pw.Schema):\n data: dict\ndt = pd.DataFrame(data={\"data\": [{\"value\": 1}, {\"value\": 2}]})\ntable = pw.debug.table_from_pandas(dt, schema=InputSchema)\nresult = table.select(result=pw.this.data.get(\"value\").as_int())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nresult\n1\n2\n```\n::\n::\nas_str()\nConverts value to a string or None if not possible.\nCurrently works for Json columns only.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport pandas as pd\nclass InputSchema(pw.Schema):\n data: dict\ndt = pd.DataFrame(data={\"data\": [{\"value\": \"dog\"}, {\"value\": \"cat\"}]})\ntable = pw.debug.table_from_pandas(dt, schema=InputSchema)\nresult = table.select(result=pw.this.data.get(\"value\").as_str())\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\nresult\ncat\ndog\n```\n::\n::\nget(index, default=None)\nExtracts element at index from an object. The object has to be a Tuple or Json.\nIf no element is present at index, it returns value specified by a default parameter.\nIndex can be effectively int for Tuple and int or str for Json.\nFor Tuples, using negative index can be used to access elements at the end, moving backwards.\n* Parameters\n * index (`ColumnExpression` | `int` | `str`) \u2013 Position to extract element at.\n * default (`Union`\\[`ColumnExpression`, `None`, `int`, `float`, `str`, `bytes`, `bool`, `Pointer`, `datetime`, `timedelta`, `ndarray`, `Json`, `dict`\\[`str`, `Any`\\], `tuple`\\[`Any`, `...`\\]\\]) \u2013 Value returned when no element is at position index. Defaults to None.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown(\n '''\n | a | b | c\n1 | 3 | 2 | 2\n2 | 4 | 1 | 0\n3 | 7 | 3 | 1\n'''\n)\nt2 = t1.with_columns(tup=pw.make_tuple(pw.this.a, pw.this.b))\nt3 = t2.select(\n x=pw.this.tup.get(1),\n y=pw.this.tup.get(3),\n z=pw.this.tup.get(pw.this.c),\n t=pw.this.tup.get(pw.this.c, default=100),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nx | y | z | t\n1 | | 4 | 4\n2 | | | 100\n3 | | 3 | 3\n```\n::\n::\nis_none()\nReturns true if the value is None.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | owner | pet\n1 | Alice | dog\n2 | Bob |\n3 | Carol | cat\n''')\nt2 = t1.with_columns(has_no_pet=pw.this.pet.is_none())\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | pet | has_no_pet\nAlice | dog | False\nBob | | True\nCarol | cat | False\n```\n::\n::\nis_not_none()\nReturns true if the value is not None.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n | owner | pet\n1 | Alice | dog\n2 | Bob |\n3 | Carol | cat\n''')\nt2 = t1.with_columns(has_pet=pw.this.pet.is_not_none())\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | pet | has_pet\nAlice | dog | True\nBob | | False\nCarol | cat | True\n```\n::\n::\nto_string()\nChanges the values to strings.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nval\n1\n2\n3\n4''')\nt1.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1, include_id=False)\n```\n::\nResult\n```\nval\n1\n2\n3\n4\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt2 = t1.select(val = pw.this.val.to_string())\nt2.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t2.select(val=pw.this.val + \"a\"), include_id=False)\n```\n::\nResult\n```\nval\n1a\n2a\n3a\n4a\n```\n::\n::\nclass pw.ColumnReference(column, table, name)\nReference to the column.\nInherits from ColumnExpression.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nisinstance(t1.age, pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nisinstance(t1[\"owner\"], pw.ColumnReference)\n```\n::\nResult\n```\nTrue\n```\n::\n::\nproperty name()\nName of the referred column.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nt1.age.name\n```\n::\nResult\n```\n'age'\n```\n::\n::\nproperty table()\nTable where the referred column belongs to.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nt1.age.table is t1\n```\n::\nResult\n```\nTrue\n```\n::\n::\nclass pw.DateTimeNaive(ts_input=, year=None, month=None, day=None, hour=None, minute=None, second=None, microsecond=None, tzinfo=None, *, nanosecond=None, tz=None, unit=None, fold=None)\nclass pw.DateTimeUtc(ts_input=, year=None, month=None, day=None, hour=None, minute=None, second=None, microsecond=None, tzinfo=None, *, nanosecond=None, tz=None, unit=None, fold=None)\nclass pw.Duration(value=, unit=None, kwargs)\nclass pw.Json(_value)\nRepresents JSON values.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\na | b | c\nTrue | 2 | manul\n''')\n@pw.udf\ndef to_json(val) -> pw.Json:\n return pw.Json(val)\nresult = t1.select({c: to_json(pw.this[c]) for c in t1.column_names()})\npw.debug.compute_and_print(result, include_id=False)\n```\n::\nResult\n```\na | b | c\ntrue | 2 | \"manul\"\n```\n::\n::\nclass pw.MonitoringLevel(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)\nSpecifies a verbosity of Pathway monitoring mechanism.\nALL( = 4 )\nMonitor input connectors and latency for each operator in the execution graph. The\nlatency is measured as the difference between the time when the operator processed\nthe data and the time when pathway acquired the data.\nAUTO( = 0 )\nAutomatically sets IN_OUT in an interactive terminal and jupyter notebook.\nSets NONE otherwise.\nAUTO_ALL( = 1 )\nAutomatically sets ALL in an interactive terminal and jupyter notebook.\nSets NONE otherwise.\nIN_OUT( = 3 )\nMonitor input connectors and input and output latency. The latency is measured as\nthe difference between the time when the operator processed the data and the time\nwhen pathway acquired the data.\nNONE( = 2 )\nNo monitoring.\nclass pw.Schema()\nBase class to inherit from when creating schemas.\nAll schemas should be subclasses of this one.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nt1.schema\n```\n::\nResult\n```\n, 'owner': , 'pet': }>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nissubclass(t1.schema, pw.Schema)\n```\n::\nResult\n```\nTrue\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nclass NewSchema(pw.Schema):\n foo: int\nSchemaSum = NewSchema | t1.schema\nSchemaSum\n```\n::\nResult\n```\n, 'owner': , 'pet': , 'foo': }>\n```\n::\n::\nclass pw.SchemaProperties(append_only=None)\nclass pw.TableSlice(mapping, table)\nCollection of references to Table columns.\nCreated by Table.slice method, or automatically by using left/right/this constructs.\nSupports basic column manipulation methods.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt1.slice.without(\"age\").with_suffix(\"_col\")\n```\n::\nResult\n```\nTableSlice({'owner_col': .owner, 'pet_col': .pet})\n```\n::\n::\nclass pw.iterate_universe(table)\nclass pw.left(*args, kwargs)\nObject for generating column references without holding the actual table in hand.\nNeeds to be evaluated in the proper context.\nFor Table.join() and JoinResult.select(), refers to the left input table.\nFor all other situations, you need pw.this object.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, pw.left.pet == pw.right.pet, pw.left.owner == pw.right.owner).select(\n age=pw.left.age, owner_name=pw.right.owner, size=pw.this.size\n )\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\nclass pw.right(*args, kwargs)\nObject for generating column references without holding the actual table in hand.\nNeeds to be evaluated in the proper context.\nFor Table.join() and JoinResult.select(), refers to the right input table.\nFor all other situations, you need pw.this object.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n 10 | Alice | 1\n 9 | Bob | 1\n 8 | Alice | 2\n''')\nt2 = pw.debug.table_from_markdown('''\nage | owner | pet | size\n 10 | Alice | 3 | M\n 9 | Bob | 1 | L\n 8 | Tom | 1 | XL\n''')\nt3 = t1.join(t2, pw.left.pet == pw.right.pet, pw.left.owner == pw.right.owner).select(\n age=pw.left.age, owner_name=pw.right.owner, size=pw.this.size\n )\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\nage | owner_name | size\n9 | Bob | L\n```\n::\n::\nclass pw.this(*args, kwargs)\nObject for generating column references without holding the actual table in hand.\nNeeds to be evaluated in the proper context.\nFor most of the Table methods, it refers to self.\nFor JoinResult, it refers to the left input table.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | 1\n9 | Bob | 1\n8 | Alice | 2\n''')\nt2 = t1.select(pw.this.owner, pw.this.age)\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nowner | age\nAlice | 8\nAlice | 10\nBob | 9\n```\n::\n::\nFunctions\npw.apply(fun, *args, kwargs)\nApplies function to column expressions, column-wise.\nOutput column type deduced from type-annotations of a function.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ndef concat(left: str, right: str) -> str:\n return left+right\nt1 = pw.debug.table_from_markdown('''\nage owner pet\n 10 Alice dog\n 9 Bob dog\n 8 Alice cat\n 7 Bob dog''')\nt2 = t1.select(col = pw.apply(concat, t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.apply_async(fun, *args, kwargs)\nApplies function asynchronously to column expressions, column-wise.\nOutput column type deduced from type-annotations of a function.\nEither a regular or async function can be passed.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport asyncio\nasync def concat(left: str, right: str) -> str:\n await asyncio.sleep(0.1)\n return left+right\nt1 = pw.debug.table_from_markdown('''\nage owner pet\n 10 Alice dog\n 9 Bob dog\n 8 Alice cat\n 7 Bob dog''')\nt2 = t1.select(col = pw.apply_async(concat, t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.apply_with_type(fun, ret_type, *args, kwargs)\nApplies function to column expressions, column-wise.\nOutput column type is provided explicitly.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n age owner pet\n1 10 Alice dog\n2 9 Bob dog\n3 8 Alice cat\n4 7 Bob dog''')\nt2 = t1.select(col = pw.apply_with_type(lambda left, right: left+right, str, t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.assert_table_has_schema(table, schema, *, allow_superset=True, ignore_primary_keys=True)\nAsserts that the schema of the table is equivalent to the schema given as an argument.\n* Parameters\n * table (`Table`) \u2013 Table for which we are asserting schema.\n * schema (`type`\\[`Schema`\\]) \u2013 Schema, which we assert that the Table has.\n * allow_superset (`bool`) \u2013 if True, the columns of the table can be a superset of columns\n in schema. The default value is True.\n * ignore_primary_keys (`bool`) \u2013 if True, the assert won\u2019t check whether table and schema\n have the same primary keys. The default value is True.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | dog\n8 | Alice | cat\n7 | Bob | dog\n''')\nt2 = t1.select(pw.this.owner, age = pw.cast(float, pw.this.age))\nschema = pw.schema_builder(\n {\"age\": pw.column_definition(dtype=float), \"owner\": pw.column_definition(dtype=str)}\n)\npw.assert_table_has_schema(t2, schema)\n```\npw.attribute(func, kwargs)\nDecorator for creation of attributes.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.attribute\n def attr(self) -> float:\n return self.arg*2\n @pw.output_attribute\n def ret(self) -> float:\n return self.attr + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 15\n8 | 17\n9 | 19\n10 | 21\n```\n::\n::\npw.cast(target_type, col)\nChanges the type of the column to target_type and converts the data of this column\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n val\n1 10\n2 9\n3 8\n4 7''')\nt1.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1, include_id=False)\n```\n::\nResult\n```\nval\n7\n8\n9\n10\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt2 = t1.select(val = pw.cast(float, t1.val))\nt2.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nval\n7.0\n8.0\n9.0\n10.0\n```\n::\n::\npw.coalesce(*args)\nFor arguments list arg_1, arg_2, \u2026, arg_n returns first not-None value.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\ncolA colB\n | 10\n 2 |\n |\n 4 | 7''')\nt2 = t1.select(t1.colA, t1.colB, col=pw.coalesce(t1.colA, t1.colB))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncolA | colB | col\n | |\n | 10 | 10\n2 | | 2\n4 | 7 | 4\n```\n::\n::\npw.column_definition(*, primary_key=False, default_value=undefined, dtype=None, name=None, append_only=None)\nCreates column definition\n* Parameters\n * primary_key (`bool`) \u2013 should column be a part of a primary key.\n * default_value (`Optional`\\[`Any`\\]) \u2013 default value replacing blank entries. The default value of the\n column must be specified explicitly,\n otherwise there will be no default value.\n * dtype (`Optional`\\[`Any`\\]) \u2013 data type. When used in schema class,\n will be deduced from the type annotation.\n * name (`Optional`\\[`str`\\]) \u2013 name of a column. When used in schema class,\n will be deduced from the attribute name.\n * append_only (`Optional`\\[`bool`\\]) \u2013 whether column is append-only. if unspecified, defaults to False\n or to value specified at the schema definition level\n* Returns\n Column definition.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nclass NewSchema(pw.Schema):\n key: int = pw.column_definition(primary_key=True)\n timestamp: str = pw.column_definition(name=\"@timestamp\")\n data: str\nNewSchema\n```\n::\nResult\n```\n, '@timestamp': , 'data': }>\n```\n::\n::\npw.declare_type(target_type, col)\nUsed to change the type of a column to a particular type.\nDisclaimer: it only changes type in a schema, it does not affect values stored.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n val\n1 10\n2 9.5\n3 8\n4 7''')\nt1.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt2 = t1.filter(t1.val == pw.cast(int, t1.val))\nt2.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt3 = t2.select(val = pw.declare_type(int, t2.val))\nt3.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\npw.if_else(if_clause, then_clause, else_clause)\nEquivalent to:\n```default\nif (if_clause):\n return (then_clause)\nelse:\n return (else_clause)\n```\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\ncolA colB\n 1 | 0\n 2 | 2\n 6 | 3''')\nt2 = t1.select(res = pw.if_else(t1.colB != 0, t1.colA // t1.colB, 0))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\nres\n0\n1\n2\n```\n::\n::\npw.input_attribute(type=)\nReturns new input_attribute. To be used inside class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.output_attribute\n def ret(self) -> float:\n return self.arg + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 8\n8 | 9\n9 | 10\n10 | 11\n```\n::\n::\npw.input_method(type=)\nDecorator for defining input methods in class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass first_transformer:\n class table(pw.ClassArg):\n a: float = pw.input_attribute()\n @pw.method\n def fun(self, arg) -> int:\n return self.a * arg\n@pw.transformer\nclass second_transformer:\n class table(pw.ClassArg):\n m = pw.input_method(int)\n @pw.output_attribute\n def val(self):\n return self.m(2)\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = first_transformer(table=t1.select(a=t1.age)).table\nt2.schema\n```\n::\nResult\n```\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt3 = second_transformer(table=t2.select(m=t2.fun)).table\npw.debug.compute_and_print(t1 + t3, include_id=False)\n```\n::\nResult\n```\nage | val\n7 | 14\n8 | 16\n9 | 18\n10 | 20\n```\n::\n::\npw.iterate(func, iteration_limit=None, kwargs)\nIterate function until fixed point.\nFunction has to take only named arguments, Tables, and return a dict of Tables.\nInitial arguments to function are passed through kwargs.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ndef collatz_transformer(iterated):\n def collatz_step(x: int) -> int:\n if x == 1:\n return 1\n elif x % 2 == 0:\n return x / 2\n else:\n return 3 * x + 1\n new_iterated = iterated.select(val=pw.apply(collatz_step, iterated.val))\n return dict(iterated=new_iterated)\ntab = pw.debug.table_from_markdown('''\nval\n 1\n 2\n 3\n 4\n 5\n 6\n 7\n 8''')\nret = pw.iterate(collatz_transformer, iterated=tab).iterated\npw.debug.compute_and_print(ret, include_id=False)\n```\n::\nResult\n```\nval\n1\n1\n1\n1\n1\n1\n1\n1\n```\n::\n::\npw.make_tuple(*args)\nCreates a tuple from the provided expressions.\n* Parameters\n args (`Union`\\[`ColumnExpression`, `None`, `int`, `float`, `str`, `bytes`, `bool`, `Pointer`, `datetime`, `timedelta`, `ndarray`, `Json`, `dict`\\[`str`, `Any`\\], `tuple`\\[`Any`, `...`\\]\\]) \u2013 a list of expressions to be put in a tuple\n* Returns\n tuple\nNOTE: * Each cell in the output column will be a tuple containing the corresponding values from the input columns.\n* The order of values in each tuple will match the order of the input columns.\n* If any of the input columns have missing values, the resulting tuples will contain None for those positions.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\na | b | c\n1 | 10 | a\n2 | 20 |\n3 | 30 | c\n'''\n)\ntable_with_tuple = table.select(res=pw.make_tuple(pw.this.a, pw.this.b, pw.this.c))\npw.debug.compute_and_print(table_with_tuple, include_id=False)\n```\n::\nResult\n```\nres\n(1, 10, 'a')\n(2, 20, None)\n(3, 30, 'c')\n```\n::\n::\npw.method(func, kwargs)\nDecorator for creation methods in class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n a: float = pw.input_attribute()\n @pw.output_attribute\n def b(self) -> float:\n return self.fun(self.a)\n @method\n def fun(self, arg) -> float:\n return self.a * arg\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(a=t1.age)).table\nt2.schema\n```\n::\nResult\n```\n, 'fun': typing.Callable[..., float]}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1 + t2.select(t2.b), include_id=False)\n```\n::\nResult\n```\nage | b\n7 | 49\n8 | 64\n9 | 81\n10 | 100\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1 + t2.select(out = t2.fun(t2.b)), include_id=False)\n```\n::\nResult\n```\nage | out\n7 | 343\n8 | 512\n9 | 729\n10 | 1000\n```\n::\n::\npw.numba_apply(fun, numba_signature, *args, kwargs)\nApplies function to column expressions, column-wise.\nFunction has to be numba compilable.\nCurrently only a few signatures are supported:\n- function has to be unary or binary\n- arguments and return type has to be either int64 or float64\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\n val\n1 1\n2 3\n3 5\n4 7''')\nt2 = t1.select(col = pw.numba_apply(lambda x: x*x-2*x+1, \"int64(int64,)\", t1.val))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\n0\n4\n16\n36\n```\n::\n::\npw.output_attribute(func, kwargs)\nDecorator for creation of output_attributes.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.output_attribute\n def ret(self) -> float:\n return self.arg + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 8\n8 | 9\n9 | 10\n10 | 11\n```\n::\n::\npw.pandas_transformer(output_schema, output_universe=None)\nDecorator that turns python function operating on pandas.DataFrame into pathway transformer.\nInput universes are converted into input DataFrame indexes.\nThe resulting index is treated as the output universe, so it must maintain uniqueness\nand be of integer type.\n* Parameters\n * output_schema (`type`\\[`Schema`\\]) \u2013 Schema of a resulting table.\n * output_universe (`UnionType`\\[`str`, `int`, `None`\\]) \u2013 Index or name of an argument whose universe will be used in resulting table. Defaults to None.\n* Returns\n Transformer that can be applied on Pathway tables.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ninput = pw.debug.table_from_markdown(\n '''\n | foo | bar\n0 | 10 | 100\n1 | 20 | 200\n2 | 30 | 300\n'''\n)\nclass Output(pw.Schema):\n sum: int\n@pw.pandas_transformer(output_schema=Output)\ndef sum_cols(t: pd.DataFrame) -> pd.DataFrame:\n return pd.DataFrame(t.sum(axis=1))\noutput = sum_cols(input)\npw.debug.compute_and_print(output, include_id=False)\n```\n::\nResult\n```\nsum\n110\n220\n330\n```\n::\n::\npw.require(val, *deps)\nReturns val iff every dep in deps is not-None.\nReturns None otherwise.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\ncolA colB\n | 10\n 2 |\n |\n 4 | 7''')\nt2 = t1.select(t1.colA, t1.colB, col=pw.require(t1.colA + t1.colB, t1.colA, t1.colB))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncolA | colB | col\n | |\n | 10 |\n2 | |\n4 | 7 | 11\n```\n::\n::\npw.run(debug=False, monitoring_level=MonitoringLevel.AUTO, with_http_server=False, default_logging=True, persistence_config=None)\nRuns the computation graph.\n* Parameters\n * debug (`bool`) \u2013 enable output out of table.debug() operators\n * monitoring_level (`MonitoringLevel`) \u2013 the verbosity of stats monitoring mechanism. One of\n pathway.MonitoringLevel.NONE, pathway.MonitoringLevel.IN_OUT,\n pathway.MonitoringLevel.ALL. If unset, pathway will choose between\n NONE and IN_OUT based on output interactivity.\n * with_http_server (`bool`) \u2013 whether to start a http server with runtime metrics. Learn\n more in a tutorial .\n * default_logging (`bool`) \u2013 whether to allow pathway to set its own logging handler. Set\n it to False if you want to set your own logging handler.\n * persistence_config (`Optional`\\[`Config`\\]) \u2013 the config for persisting the state in case this\n persistence is required.\npw.schema_builder(columns, *, name=None, properties=SchemaProperties(append_only=None))\nAllows to build schema inline, from a dictionary of column definitions.\n* Parameters\n * columns (`dict`\\[`str`, `ColumnDefinition`\\]) \u2013 dictionary of column definitions.\n * name (`Optional`\\[`str`\\]) \u2013 schema name.\n * properties (`SchemaProperties`) \u2013 schema properties.\n* Returns\n Schema\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\npw.schema_builder(columns={\n 'key': pw.column_definition(dtype=int, primary_key=True),\n 'data': pw.column_definition(dtype=int, default_value=0)\n}, name=\"my_schema\")\n```\n::\nResult\n```\n, 'data': }>\n```\n::\n::\npw.schema_from_csv(path, *, name=None, properties=SchemaProperties(append_only=None), delimiter=',', quote='\"', comment_character=None, escape=None, double_quote_escapes=True, num_parsed_rows=None)\nAllows to generate schema based on a CSV file.\nThe names of the columns are taken from the header of the CSV file.\nTypes of columns are inferred from the values, by checking if they can be parsed.\nCurrently supported types are str, int and float.\n* Parameters\n * path (`str`) \u2013 path to the CSV file.\n * name (`Optional`\\[`str`\\]) \u2013 schema name.\n * properties (`SchemaProperties`) \u2013 schema properties.\n * delimiter (`str`) \u2013 delimiter used in CSV file. Defaults to \u201c,\u201d.\n * quote (`str`) \u2013 quote character used in CSV file. Defaults to \u2018\u201d\u2019.\n * comment_character (`Optional`\\[`str`\\]) \u2013 character used in CSV file to denote comments.\n Defaults to None\n * escape (`Optional`\\[`str`\\]) \u2013 escape character used in CSV file. Defaults to None.\n * double_quote_escapes (`bool`) \u2013 enable escapes of double quotes. Defaults to True.\n * num_parsed_rows (`Optional`\\[`int`\\]) \u2013 number of rows, which will be parsed when inferring types. When\n set to None, all rows will be parsed. When set to 0, types of all columns\n will be set to str. Defaults to None.\n* Returns\n Schema\npw.schema_from_dict(columns, *, name=None, properties=SchemaProperties(append_only=None))\nAllows to build schema inline, from a dictionary of column definitions.\nCompared to pw.schema_builder, this one uses simpler structure of the dictionary,\nwhich allows it to be loaded from JSON file.\n* Parameters\n * columns (`dict`) \u2013 dictionary of column definitions. The keys in this dictionary are names\n of the columns, and the values are either:\n - type of the column\n - dictionary with keys: \u201cdtype\u201d, \u201cprimary_key\u201d, \u201cdefault_value\u201d and values,\n respectively, type of the column, whether it is a primary key, and column\u2019s\n default value.\n The type can be given both by python class, or string with class name - that\n is both int and \u201cint\u201d are accepted.\n * name (`Optional`\\[`str`\\]) \u2013 schema name.\n * properties (`dict` | `SchemaProperties`) \u2013 schema properties, given either as instance of SchemaProperties class\n or a dict specifying arguments of SchemaProperties class.\n* Returns\n Schema\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\npw.schema_from_dict(columns={\n 'key': {\"dtype\": \"int\", \"primary_key\": True},\n 'data': {\"dtype\": \"int\", \"default_value\": 0}\n}, name=\"my_schema\")\n```\n::\nResult\n```\n, 'data': }>\n```\n::\n::\npw.schema_from_types(_name=None, kwargs)\nConstructs schema from kwargs: field=type.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\ns = pw.schema_from_types(foo=int, bar=str)\ns\n```\n::\nResult\n```\n, 'bar': }>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nissubclass(s, pw.Schema)\n```\n::\nResult\n```\nTrue\n```\n::\n::\npw.sql(query, kwargs)\nRun a SQL query on Pathway tables.\n* Parameters\n * query (`str`) \u2013 the SQL query to execute.\n * kwargs (`Table`) \u2013 the association name: table used for the execution of the SQL query. Each name:table pair links a Pathway table to a table name used in the SQL query.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n \"\"\"\n A | B\n 1 | 2\n 4 | 3\n 4 | 7\n \"\"\"\n)\nret = pw.sql(\"SELECT * FROM tab WHERE A\npw.table_transformer(func=None, *, allow_superset=True, ignore_primary_keys=True, locals=None)\nDecorator for marking that a function performs operations on Tables. As a consequence,\narguments and return value, which are annotated to have type pw.Table\\[S\\]\nwill be checked whether they indeed have schema S.\n* Parameters\n * allow_superset (`Union`\\[`bool`, `Mapping`\\[`str`, `bool`\\]\\]) \u2013 if True, the columns of the table can be a superset of columns\n in schema. Can be given either as a bool, and this value is then used for\n all tables, or for each argument separately, by providing a dict whose keys\n are names of arguments, and values are bools specifying value of allow_superset\n for this argument. In the latter case to provide value for return value, provide\n value for key \u201creturn\u201d. The default value is True.\n * ignore_primary_keys (`Union`\\[`bool`, `Mapping`\\[`str`, `bool`\\]\\]) \u2013 if True, the assert won\u2019t check whether table and schema\n have the same primary keys. Can be given either as a bool, and this value is then used for\n all tables, or for each argument separately, by providing a dict whose keys\n are names of arguments, and values are bools specifying value of ignore_primary_keys\n for this argument. The default value is True.\n * locals (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 when Schema class, which is used as a parameter to pw.Table is defined locally,\n you need to pass locals() as locals argument.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\nA | B\n1 | 6\n3 | 8\n5 | 2\n''')\nschema = pw.schema_from_types(A=int, B=int)\nresult_schema = pw.schema_from_types(A=int, B=int, C=int)\n@pw.table_transformer\ndef sum_columns(t: pw.Table[schema]) -> pw.Table[result_schema]:\n result = t.with_columns(C=pw.this.A + pw.this.B)\n return result\npw.debug.compute_and_print(sum_columns(t1), include_id=False)\n```\n::\nResult\n```\nA | B | C\n1 | 6 | 7\n3 | 8 | 11\n5 | 2 | 7\n```\n::\n::\npw.transformer(cls)\nDecorator that wraps the outer class when defining class transformers.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.transformer\nclass simple_transformer:\n class table(pw.ClassArg):\n arg = pw.input_attribute()\n @pw.output_attribute\n def ret(self) -> float:\n return self.arg + 1\nt1 = pw.debug.table_from_markdown('''\nage\n10\n9\n8\n7''')\nt2 = simple_transformer(table=t1.select(arg=t1.age)).table\npw.debug.compute_and_print(t1 + t2, include_id=False)\n```\n::\nResult\n```\nage | ret\n7 | 8\n8 | 9\n9 | 10\n10 | 11\n```\n::\n::\npw.udf(fun)\nCreate a Python UDF (universal data function) out of a callable.\nThe output type of the UDF is determined based on its type annotation.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\n@pw.udf\ndef concat(left: str, right: str) -> str:\n return left+right\nt1 = pw.debug.table_from_markdown('''\nage owner pet\n 10 Alice dog\n 9 Bob dog\n 8 Alice cat\n 7 Bob dog''')\nt2 = t1.select(col = concat(t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.udf_async(fun=None, *, capacity=None, retry_strategy=None, cache_strategy=None)\nCreate a Python asynchronous UDF (universal data function) out of a callable.\nOutput column type deduced from type-annotations of a function.\nCan be applied to a regular or asynchronous function.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nimport asyncio\n@pw.udf_async\nasync def concat(left: str, right: str) -> str:\n await asyncio.sleep(0.1)\n return left+right\nt1 = pw.debug.table_from_markdown('''\nage owner pet\n 10 Alice dog\n 9 Bob dog\n 8 Alice cat\n 7 Bob dog''')\nt2 = t1.select(col = concat(t1.owner, t1.pet))\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncol\nAlicecat\nAlicedog\nBobdog\nBobdog\n```\n::\n::\npw.unwrap(col)\nChanges the type of the column from Optional\\[T\\] to T. If there is any None in the\ncolumn this operation will raise an exception.\nExample:\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nimport pathway as pw\nt1 = pw.debug.table_from_markdown('''\ncolA | colB\n1 | 5\n2 | 9\n3 | None\n4 | 15''')\nt1.schema\n```\n::\nResult\n```\n, 'colB': int | None}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t1, include_id=False)\n```\n::\nResult\n```\ncolA | colB\n1 | 5\n2 | 9\n3 |\n4 | 15\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt2 = t1.filter(t1.colA < 3)\nt2.schema\n```\n::\nResult\n```\n, 'colB': int | None}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t2, include_id=False)\n```\n::\nResult\n```\ncolA | colB\n1 | 5\n2 | 9\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\nt3 = t2.select(colB = pw.unwrap(t2.colB))\nt3.schema\n```\n::\nResult\n```\n}>\n```\n::\n::\n"} -{"doc": "---\ntitle: Pathway API\nsidebar: 'API'\nnavigation: true\n---\n# Pathway API\nReference for all the Pathway classes and functions.\nSee Table API for the main Table class.\nclass pw.AsyncTransformer(input_table)\nAllows to perform async transformations on a table.\n`invoke()` will be called asynchronously for each row of an input_table.\nOutput table can be acccesed via `result`.\nExample:\n```python\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ncolB\n5\n9\n```\n::\n::\n"} -{"doc": "---\ntitle: pathway.io.http package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.http package\nclass pw.io.http.RetryPolicy(first_delay_ms, backoff_factor, jitter_ms)\nClass representing policy of delays or backoffs for the retries.\nFunctions\npw.io.http.read(url, *, schema=None, method='GET', payload=None, headers=None, response_mapper=None, format='json', delimiter=None, n_retries=0, retry_policy=, connect_timeout_ms=None, request_timeout_ms=None, allow_redirects=True, retry_codes=(429, 500, 502, 503, 504), autocommit_duration_ms=10000, debug_data=None, value_columns=None, primary_key=None, types=None, default_values=None)\nReads a table from an HTTP stream.\n* Parameters\n * url (`str`) \u2013 the full URL of streaming endpoint to fetch data from.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * method (`str`) \u2013 request method for streaming. It should be one of\n HTTP request methods.\n * payload (`Optional`\\[`Any`\\]) \u2013 data to be send in the body of the request.\n * headers (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 request headers in the form of dict. Wildcards are allowed both, in\n keys and in values.\n * response_mapper (`Optional`\\[`Callable`\\[\\[`str` | `bytes`\\], `bytes`\\]\\]) \u2013 in case a response needs to be processed, this method can be\n provided. It will be applied to each slice of a stream.\n * format (`str`) \u2013 format of the data, \u201cjson\u201d or \u201craw\u201d. In case of a \u201craw\u201d format,\n table with single \u201cdata\u201d column will be produced. For \u201cjson\u201d format, bytes\n encoded json is expected.\n * delimiter (`UnionType`\\[`str`, `bytes`, `None`\\]) \u2013 delimiter used to split stream into messages.\n * n_retries (`int`) \u2013 how many times to retry the failed request.\n * retry_policy (`RetryPolicy`) \u2013 policy of delays or backoffs for the retries.\n * connect_timeout_ms (`Optional`\\[`int`\\]) \u2013 connection timeout, specified in milliseconds. In case\n it\u2019s None, no restrictions on connection duration will be applied.\n * request_timeout_ms (`Optional`\\[`int`\\]) \u2013 request timeout, specified in milliseconds. In case\n it\u2019s None, no restrictions on request duration will be applied.\n * allow_redirects (`bool`) \u2013 whether to allow redirects.\n * retry_codes (`Optional`\\[`tuple`\\]) \u2013 HTTP status codes that trigger retries.\n * content_type \u2013 content type of the data to send. In case the chosen format is\n JSON, it will be defaulted to \u201capplication/json\u201d.\n * autocommit_duration_ms (`int`) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * debug_data \u2013 static data replacing original one when debug mode is active.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 columns to extract for a table. \\[will be deprecated soon\\]\n * primary_key (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 in case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated as uuid4. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 dictionary containing the mapping between the columns and the data types\n (`pw.Type`) of the values of those columns. This parameter is optional, and\n if not provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\nExamples:\nRaw format:\n```python\nimport os\nimport pathway as pw\ntable = pw.io.http.read(\n \"https://localhost:8000/stream\",\n method=\"GET\",\n headers={\"Authorization\": f\"Bearer {os.environ['BEARER_TOKEN']}\"},\n format=\"raw\",\n)\n```\nJSON with response mapper:\nInput can be adjusted using a mapping function that will be applied to each\nslice of a stream. The mapping function should return bytes.\n```python\ndef mapper(msg: bytes) -> bytes:\n result = json.loads(msg.decode())\n return json.dumps({\"key\": result[\"id\"], \"text\": result[\"data\"]}).encode()\nclass InputSchema(pw.Schema):\n key: int\n text: str\nt = pw.io.http.read(\n \"https://localhost:8000/stream\",\n method=\"GET\",\n headers={\"Authorization\": f\"Bearer {os.environ['BEARER_TOKEN']}\"},\n schema=InputSchema,\n response_mapper=mapper\n)\n```\npw.io.http.rest_connector(host, port, *, route='/', schema=None, autocommit_duration_ms=1500, keep_queries=None, delete_completed_queries=None)\nRuns a lightweight HTTP server and inputs a collection from the HTTP endpoint,\nconfigured by the parameters of this method.\nOn the output, the method provides a table and a callable, which needs to accept\nthe result table of the computation, which entries will be tracked and put into\nrespective request\u2019s responses.\n* Parameters\n * host (`str`) \u2013 TCP/IP host or a sequence of hosts for the created endpoint;\n * port (`int`) \u2013 port for the created endpoint;\n * route (`str`) \u2013 route which will be listened to by the web server;\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 schema of the resulting table;\n * autocommit_duration_ms \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph;\n * keep_queries (`Optional`\\[`bool`\\]) \u2013 whether to keep queries after processing; defaults to False. \\[deprecated\\]\n * delete_completed_queries (`Optional`\\[`bool`\\]) \u2013 whether to send a deletion entry after the query is processed.\n Allows to remove it from the system if it is stored by operators such as `join` or `groupby`;\n* Returns\n *table* \u2013 the table read;\n response_writer: a callable, where the result table should be provided.\npw.io.http.write(table, url, *, method='POST', format='json', request_payload_template=None, n_retries=0, retry_policy=, connect_timeout_ms=None, request_timeout_ms=None, content_type=None, headers=None, allow_redirects=True, retry_codes=(429, 500, 502, 503, 504))\nSends the stream of updates from the table to the specified HTTP API.\n* Parameters\n * table (`Table`) \u2013 table to be tracked.\n * method (`str`) \u2013 request method for streaming. It should be one of\n HTTP request methods.\n * url (`str`) \u2013 the full URL of the endpoint to push data into. Can contain wildcards.\n * format (`str`) \u2013 the payload format, one of {\u201cjson\u201d, \u201ccustom\u201d}. If \u201cjson\u201d is\n specified, the plain JSON will be formed and sent. Otherwise, the contents of the\n field request_payload_template will be used.\n * request_payload_template (`Optional`\\[`str`\\]) \u2013 the template to format and send in case \u201ccustom\u201d was\n specified in the format field. Can include wildcards.\n * n_retries (`int`) \u2013 how many times to retry the failed request.\n * retry_policy (`RetryPolicy`) \u2013 policy of delays or backoffs for the retries.\n * connect_timeout_ms (`Optional`\\[`int`\\]) \u2013 connection timeout, specified in milliseconds. In case\n it\u2019s None, no restrictions on connection duration will be applied.\n * request_timeout_ms (`Optional`\\[`int`\\]) \u2013 request timeout, specified in milliseconds. In case it\u2019s\n None, no restrictions on request duration will be applied.\n * allow_redirects (`bool`) \u2013 Whether to allow redirects.\n * retry_codes (`Optional`\\[`tuple`\\]) \u2013 HTTP status codes that trigger retries.\n * content_type (`Optional`\\[`str`\\]) \u2013 content type of the data to send. In case the chosen format is\n JSON, it will be defaulted to \u201capplication/json\u201d.\n * headers (`Optional`\\[`dict`\\[`str`, `str`\\]\\]) \u2013 request headers in the form of dict. Wildcards are allowed both, in\n keys and in values.\nWildcards:\nWildcards are the proposed way to customize the HTTP requests composed. The\nengine will replace all entries of `{table.}` with a value from the\ncolumn `` in the row sent. This wildcard resolving will happen in url,\nrequest payload template and headers.\nExamples:\nFor the sake of demonstration, let\u2019s try different ways to send the stream of changes\non a table `pets`, containing data about pets and their owners. The table contains\njust two columns: the pet and the owner\u2019s name.\n```python\nimport pathway as pw\npets = pw.debug.table_from_markdown(\"owner pet \\n Alice dog \\n Bob cat \\n Alice cat\")\n```\nConsider that there is a need to send the stream of changes on such table to the\nexternal API endpoint (let\u2019s pick some exemplary URL for the sake of demonstration).\nTo keep things simple, we can suppose that this API accepts flat JSON objects, which\nare sent in POST requests. Then, the communication can be done with a simple code\nsnippet:\n```python\npw.io.http.write(pets, \"http://www.example.com/api/event\")\n```\nNow let\u2019s do something more custom. Suppose that the API endpoint requires us to\ncommunicate via PUT method and to pass the values as CGI-parameters. In this case,\nwildcards are the way to go:\n```python\npw.io.http.write(\n pets,\n \"http://www.example.com/api/event?owner={table.owner}&pet={table.pet}\",\n method=\"PUT\"\n)\n```\nA custom payload can also be formed from the outside. What if the endpoint requires\nthe data in tskv format in request body?\nFirst of all, let\u2019s form a template for the message body:\n```python\nmessage_template_tokens = [\n \"owner={table.owner}\",\n \"pet={table.pet}\",\n \"time={table.time}\",\n \"diff={table.diff}\",\n]\nmessage_template = \"\\t\".join(message_template_tokens)\n```\nNow, we can use this template and the custom format, this way:\n```python\npw.io.http.write(\n pets,\n \"http://www.example.com/api/event\",\n method=\"POST\",\n format=\"custom\",\n request_payload_template=message_template\n)\n```\n"} -{"doc": "pathway.xpacks.spatial.h3 module\npw.xpacks.spatial.h3.h3_cover_geojson(geojson, h3_level)\nCovers geojson with H3 cells at the given level.\nBuilt-in h3.polyfill is not enough as it outputs H3 cells for which their centroids fall into geojson.\n"} -{"doc": "pathway.xpacks.spatial.geofencing module\nclass pw.xpacks.spatial.geofencing.GeofenceIndex(data, geojson_geometry, resolution_meters, instance=None)\nH3-based geospatial index allowing for efficient point location inside geofences.\nGeofences are mapped to the corresponding cells id at a fixed hierarchy level.\nSee https://h3geo.org/docs/highlights/indexing/ for the description of H3 index structure.\nParameters:\ndata (pw.Table): The table containing the data to be indexed.\ngeometry (pw.ColumnExpression): The column expression representing geofences as geojsons.\nresolution_meters (float): approximately determines how large covering H3 cells should be\ninstance (pw.ColumnExpression or None): The column expression representing the instance of the index\n> allowing for creating multiple indexes at once.\nCaveats:\nGeofences crossing antimeridian are not yet supported.\njoin_enclosing_geofences(query_table, *, lat, lon, instance=None)\nEfficiently joins (via left_join) rows of query table with rows of indexed geofences\nfor which the query point is inside a target geofence.\nParameters:\nquery_table (pw.Table): The table containing the queries.\nlat (pw.ColumnExpression): The column expression representing latitudes (degrees) in the query_table.\nlon (pw.ColumnExpression): The column expression representing longitudes (degrees) in the query_table.\ninstance (pw.ColumnExpression or None): The column expression representing the instance of the index\n> allowing for parallel queries to multiple indexes at once.\n* Returns\n *pw.JoinResult* \u2013 result of a join between query_table and indexed data table\nExample:\nCode\n```python\nimport pathway as pw\nqueries = pw.debug.table_from_markdown('''\n | lon | lat | sample_data\n1 | 11.0 | 1.0 | foo\n2 | 11.0 | 21.0 | bar\n3 | 20.0 | 1.0 | baz\n''')\n@pw.udf\ndef json_parse(col: str) -> pw.Json:\n return pw.Json.parse(col)\ndata = pw.debug.table_from_markdown('''\n | other_data | geometry\n111 | AAA | {\"coordinates\":[[[10.0,0.0],[12.0,0.0],[12.0,2.0],[10.0,2.0]]],\"type\":\"Polygon\"}\n222 | BBB | {\"coordinates\":[[[10.0,20.0],[12.0,20.0],[12.0,22.0],[10.0,22.0]]],\"type\":\"Polygon\"}\n''').with_columns(geometry=json_parse(pw.this.geometry))\nindex = pw.xpacks.spatial.geofencing.GeofenceIndex(\n data, data.geometry, resolution_meters=100_000,\n)\nres = index.join_enclosing_geofences(\n queries,\n lat=queries.lat,\n lon=queries.lon,\n).select(\n queries.sample_data,\n pw.right.other_data,\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\nsample_data | other_data\nbar | BBB\nbaz |\nfoo | AAA\n```\n::\n::\nFunctions\npw.xpacks.spatial.geofencing.is_in_geofence(lat, lon, geojson_geometry)\nTest if point is inside a geojson polygon\n"} -{"doc": "pathway.xpacks.spatial.index module\nclass pw.xpacks.spatial.index.H3Index(data, lat, lon, radius_meters, instance=None)\nH3-based geospatial index allowing for finding nearby lat lon points.\nLat lon points are mapped to the corresponding cell id at a fixed hierarchy level.\nThey are also mapped to the neighboring cells for fast closeby points retrieval.\nSee https://h3geo.org/docs/highlights/indexing/ for the description of H3 index structure.\nParameters:\ndata (pw.Table): The table containing the data to be indexed.\nlat (pw.ColumnExpression): The column expression representing latitudes (degrees) in the data.\nlon (pw.ColumnExpression): The column expression representing longitudes (degrees) in the data.\nradius_meters (float): maximum distance supported\ninstance (pw.ColumnExpression or None): The column expression representing the instance of the index\n> allowing for creating multiple indexes at once.\njoin_on_distance(query_table, query_lat, query_lon, distance_meters=None, instance=None)\nThis method efficiently joins (via left_join) rows of query table with rows of indexed data\nsuch that two points are within a certain distance.\nParameters:\nquery_table (pw.Table): The table containing the queries.\nlat (pw.ColumnExpression): The column expression representing latitudes (degrees) in the query_table.\nlon (pw.ColumnExpression): The column expression representing longitudes (degrees) in the query_table.\ninstance (pw.ColumnExpression or None): The column expression representing the instance of the index\n> allowing for parallel queries to multiple indexes at once.\n* Returns\n *pw.JoinResult* \u2013 result of a (distance-limited) join between query_table and indexed data table\nExample:\nCode\n```python\nimport pathway as pw\nqueries = pw.debug.table_from_markdown('''\n | instance | lat | lon | sample_data\n1 | 1 | 51.1000 | 17.0300 | foo\n2 | 1 | 51.1010 | 17.0310 | bar\n3 | 2 | 40.0000 | 179.999 | baz\n4 | 2 | 10.0000 | 10.0000 | zzz\n''')\ndata = pw.debug.table_from_markdown('''\n | instance | lat | lon | other_data\n111 | 1 | 51.0990 | 17.0290 | AAA\n112 | 1 | 51.1000 | 17.0300 | BBB\n113 | 1 | 51.1010 | 17.0310 | CCC\n114 | 1 | 51.1020 | 17.0320 | DDD\n311 | 2 | 40.0000 | 179.999 | EEE\n313 | 2 | 40.0000 | -179.999 | FFF\n314 | 2 | 40.0000 | -179.980 | GGG\n412 | 2 | 51.1000 | 17.0300 | HHH\n''')\nindex = pw.xpacks.spatial.index.H3Index(\n data, data.lat, data.lon, instance=data.instance, radius_meters=200,\n)\nres = index.join_on_distance(\n queries,\n queries.lat,\n queries.lon,\n instance=queries.instance,\n).select(\n instance=queries.instance,\n sample_data=queries.sample_data,\n other_data=pw.right.other_data,\n dist_meters=pw.left.dist_meters.num.fill_na(-1).num.round(1),\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\ninstance | sample_data | other_data | dist_meters\n1 | bar | BBB | 131.5\n1 | bar | CCC | 0.0\n1 | bar | DDD | 131.5\n1 | foo | AAA | 131.5\n1 | foo | BBB | 0.0\n1 | foo | CCC | 131.5\n2 | baz | EEE | 0.0\n2 | baz | FFF | 170.8\n2 | zzz | | -1.0\n```\n::\n::\n"} -{"doc": "---\ntitle: Other API\nsidebar: 'API'\nnavigation: true\n---\n# Other API\nThe Other API section provides a complementary collection of resources covering various aspects of our Pathway Standard Library. This section is helpful for developers and data analysts seeking to extend their knowledge and proficiency with our diverse API offerings. In addition to Temporal Functions, it provides in-depth information about Machine Learning Models and some column functions.\n# Contents:\n* Temporal Functions\n* ML Classifiers\n"} -{"doc": "---\ntitle: pathway.stdlib.stateful package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.stdlib.stateful package\nFunctions\npw.stateful.deduplicate(table, *, col, instance=None, acceptor)\nDeduplicates rows in table on col column using acceptor function.\nIt keeps rows which where accepted by the acceptor function.\nAcceptor operates on two arguments - current value and the previous accepted value.\n* Parameters\n * table (*pw.Table\\[TSchema\\]*) \u2013 table to deduplicate\n * col (*pw.ColumnReference*) \u2013 column used for deduplication\n * acceptor (*Callable\\[\\[TDedupe, TDedupe\\], bool\\]*) \u2013 callback telling whether two values are different\n * instance (*pw.ColumnExpression, optional*) \u2013 Group column for which deduplication will be performed separately.\n Defaults to None.\n* Returns\n *pw.Table\\[TSchema\\]*\n"} -{"doc": "pathway.stdlib.stateful.deduplicate module\npw.stateful.deduplicate.deduplicate(table, *, col, instance=None, acceptor)\nDeduplicates rows in table on col column using acceptor function.\nIt keeps rows which where accepted by the acceptor function.\nAcceptor operates on two arguments - current value and the previous accepted value.\n* Parameters\n * table (*pw.Table\\[TSchema\\]*) \u2013 table to deduplicate\n * col (*pw.ColumnReference*) \u2013 column used for deduplication\n * acceptor (*Callable\\[\\[TDedupe, TDedupe\\], bool\\]*) \u2013 callback telling whether two values are different\n * instance (*pw.ColumnExpression, optional*) \u2013 Group column for which deduplication will be performed separately.\n Defaults to None.\n* Returns\n *pw.Table\\[TSchema\\]*\n"} -{"doc": "Subpackages\n* pathway.stdlib.graphs package\n * `Edge`\n * `Graph`\n * `Vertex`\n * `WeightedGraph`\n * Subpackages\n * pathway.stdlib.graphs.bellman_ford package\n * `DistFromSource`\n * `Vertex`\n * Submodules\n * pathway.stdlib.graphs.bellman_ford.impl module\n * pathway.stdlib.graphs.louvain_communities package\n * Submodules\n * pathway.stdlib.graphs.louvain_communities.impl module\n * pathway.stdlib.graphs.pagerank package\n * `Result`\n * Submodules\n * pathway.stdlib.graphs.pagerank.impl module\n * Submodules\n * pathway.stdlib.graphs.common module\n * `Cluster`\n * `Clustering`\n * `Edge`\n * `Vertex`\n * `Weight`\n * pathway.stdlib.graphs.graph module\n * `Graph`\n * `WeightedGraph`\n* pathway.stdlib.indexing package\n * `SortedIndex`\n * `SortedIndex.clear()`\n * `SortedIndex.copy()`\n * `SortedIndex.fromkeys()`\n * `SortedIndex.get()`\n * `SortedIndex.items()`\n * `SortedIndex.keys()`\n * `SortedIndex.pop()`\n * `SortedIndex.popitem()`\n * `SortedIndex.setdefault()`\n * `SortedIndex.update()`\n * `SortedIndex.values()`\n * `retrieve_prev_next_values()`\n * Submodules\n * pathway.stdlib.indexing.sorting module\n * `Aggregate`\n * `BinsearchOracle`\n * `Candidate`\n * `ComparisonRet`\n * `Hash`\n * `Instance`\n * `Key`\n * `LeftRight`\n * `Node`\n * `Parent`\n * `PrefixSumOracle`\n * `PrevNext`\n * `SortedIndex`\n * `SortedIndex.clear()`\n * `SortedIndex.copy()`\n * `SortedIndex.fromkeys()`\n * `SortedIndex.get()`\n * `SortedIndex.items()`\n * `SortedIndex.keys()`\n * `SortedIndex.pop()`\n * `SortedIndex.popitem()`\n * `SortedIndex.setdefault()`\n * `SortedIndex.update()`\n * `SortedIndex.values()`\n * `Value`\n * `retrieve_prev_next_values()`\n* pathway.stdlib.ml package\n * Subpackages\n * pathway.stdlib.ml.classifiers package\n * `knn_lsh_classifier_train()`\n * `knn_lsh_classify()`\n * `knn_lsh_euclidean_classifier_train()`\n * `knn_lsh_generic_classifier_train()`\n * `knn_lsh_train()`\n * Submodules\n * pathway.stdlib.ml.classifiers.test_lsh module\n * pathway.stdlib.ml.datasets package\n * Subpackages\n * pathway.stdlib.ml.smart_table_ops package\n * `Edge`\n * `Feature`\n * `FuzzyJoinFeatureGeneration`\n * `FuzzyJoinNormalization`\n * `JoinResult`\n * `Node`\n * Submodules\n * pathway.stdlib.ml.index module\n * `KNNIndex`\n * `KNNIndex.get_nearest_items()`\n * `KNNIndex.get_nearest_items_asof_now()`\n * pathway.stdlib.ml.utils module\n* pathway.stdlib.ordered package\n * `diff()`\n * Submodules\n * pathway.stdlib.ordered.diff module\n * `diff()`\n* pathway.stdlib.stateful package\n * `deduplicate()`\n * Submodules\n * pathway.stdlib.stateful.deduplicate module\n * `deduplicate()`\n* pathway.stdlib.statistical package\n * `interpolate()`\n* pathway.stdlib.temporal package\n * `AsofJoinResult`\n * `AsofNowJoinResult`\n * `AsofNowJoinResult.select()`\n * `CommonBehavior`\n * `Direction`\n * `IntervalJoinResult`\n * `IntervalJoinResult.select()`\n * `Window`\n * `WindowJoinResult`\n * `WindowJoinResult.select()`\n * `asof_join()`\n * `asof_join_left()`\n * `asof_join_outer()`\n * `asof_join_right()`\n * `asof_now_join()`\n * `asof_now_join_inner()`\n * `asof_now_join_left()`\n * `common_behavior()`\n * `interval()`\n * `interval_join()`\n * `interval_join_inner()`\n * `interval_join_left()`\n * `interval_join_outer()`\n * `interval_join_right()`\n * `intervals_over()`\n * `session()`\n * `sliding()`\n * `tumbling()`\n * `window_join()`\n * `window_join_inner()`\n * `window_join_left()`\n * `window_join_outer()`\n * `window_join_right()`\n * `windowby()`\n * Submodules\n * pathway.stdlib.temporal.temporal_behavior module\n * `Behavior`\n * `CommonBehavior`\n * `ExactlyOnceBehavior`\n * `common_behavior()`\n * `exactly_once_behavior()`\n * pathway.stdlib.temporal.utils module\n * `check_joint_types()`\n* pathway.stdlib.utils package\n * Submodules\n * pathway.stdlib.utils.async_transformer module\n * `AsyncTransformer`\n * `AsyncTransformer.close()`\n * `AsyncTransformer.invoke()`\n * `AsyncTransformer.open()`\n * `AsyncTransformer.result`\n * `AsyncTransformer.with_options()`\n * pathway.stdlib.utils.bucketing module\n * pathway.stdlib.utils.col module\n * `apply_all_rows()`\n * `flatten_column()`\n * `groupby_reduce_majority()`\n * `multiapply_all_rows()`\n * `unpack_col()`\n * pathway.stdlib.utils.filtering module\n * pathway.stdlib.utils.pandas_transformer module\n * `pandas_transformer()`\n"} -{"doc": "---\ntitle: pathway.debug package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.debug package\nFunctions\npw.debug.compute_and_print(table, *, include_id=True, short_pointers=True, n_rows=None)\nA function running the computations and printing the table.\n:type table: `Table`\n:param table: a table to be computed and printed\n:type include_id: \n:param include_id: whether to show ids of rows\n:type short_pointers: \n:param short_pointers: whether to shorten printed ids\n:type n_rows: `Optional`\\[`int`\\]\n:param n_rows: number of rows to print, if None whole table will be printed\npw.debug.compute_and_print_update_stream(table, *, include_id=True, short_pointers=True, n_rows=None)\nA function running the computations and printing the update stream of the table.\n:type table: `Table`\n:param table: a table for which the update stream is to be computed and printed\n:type include_id: \n:param include_id: whether to show ids of rows\n:type short_pointers: \n:param short_pointers: whether to shorten printed ids\n:type n_rows: `Optional`\\[`int`\\]\n:param n_rows: number of rows to print, if None whole update stream will be printed\npw.debug.table_from_markdown(table_def, id_from=None, unsafe_trusted_ids=False, schema=None)\nA function for creating a table from its definition in markdown. If it contains a special\ncolumn `__time__`, rows will be split into batches with timestamps from the column.\nA special column `__diff__` can be used to set an event type - with `1` treated\nas inserting the row and `-1` as removing it.\npw.debug.table_from_pandas(df, id_from=None, unsafe_trusted_ids=False, schema=None)\nA function for creating a table from a pandas DataFrame. If it contains a special\ncolumn `__time__`, rows will be split into batches with timestamps from the column.\nA special column `__diff__` can be used to set an event type - with `1` treated\nas inserting the row and `-1` as removing it.\npw.debug.table_from_parquet(path, id_from=None, unsafe_trusted_ids=False)\nReads a Parquet file into a pandas DataFrame and then converts that into a Pathway table.\npw.debug.table_to_parquet(table, filename)\nConverts a Pathway Table into a pandas DataFrame and then writes it to Parquet\n"} -{"doc": "---\ntitle: pathway.io.elasticsearch package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.elasticsearch package\nFunctions\npw.io.elasticsearch.write(table, host, auth, index_name)\nWrite a table to a given index in ElasticSearch.\n* Parameters\n * table (`Table`) \u2013 the table to output.\n * host (`str`) \u2013 the host and port, on which Elasticsearch server works.\n * auth (`ElasticSearchAuth`) \u2013 credentials for Elasticsearch authorization.\n * index_name (`str`) \u2013 name of the index, which gets the docs.\n* Returns\n None\nExample:\nConsider there is an instance of Elasticsearch, running locally on a port 9200.\nThere we have an index \u201canimals\u201d, containing an information about pets and their\nowners.\nFor the sake of simplicity we will also consider that the cluster has a simple\nusername-password authentication having both username and password equal to \u201cadmin\u201d.\nNow suppose we want to send a Pathway table pets to this local instance of\nElasticsearch.\n```python\nimport pathway as pw\npets = pw.debug.table_from_markdown('''\nage | owner | pet\n10 | Alice | dog\n9 | Bob | cat\n8 | Alice | cat\n''')\n```\nIt can be done as follows:\n```python\npw.io.elasticsearch.write(\n table=pets,\n host=\"http://localhost:9200\",\n auth=pw.io.elasticsearch.ElasticSearchAuth.basic(\"admin\", \"admin\"),\n index_name=\"animals\",\n)\n```\nAll the updates of table \u201cpets\u201d will be indexed to \u201canimals\u201d as well.\n"} -{"doc": "Read and write\n* pathway.io.csv package\n * `read()`\n * `write()`\n* pathway.io.fs package\n * `read()`\n * `write()`\n* pathway.io.http package\n * `RetryPolicy`\n * `read()`\n * `rest_connector()`\n * `write()`\n* pathway.io.jsonlines package\n * `read()`\n * `write()`\n* pathway.io.kafka package\n * `read()`\n * `read_from_upstash()`\n * `simple_read()`\n * `write()`\n* pathway.io.redpanda package\n * `read()`\n * `write()`\n"} -{"doc": "Read only\n* pathway.io.debezium package\n * `read()`\n* pathway.io.plaintext package\n * `read()`\n* pathway.io.python package\n * `ConnectorSubject`\n * `ConnectorSubject.close()`\n * `ConnectorSubject.commit()`\n * `ConnectorSubject.next_bytes()`\n * `ConnectorSubject.next_json()`\n * `ConnectorSubject.next_str()`\n * `ConnectorSubject.on_stop()`\n * `ConnectorSubject.start()`\n * `read()`\n* pathway.io.s3 package\n * `AwsS3Settings`\n * `AwsS3Settings.new_from_path()`\n * `DigitalOceanS3Settings`\n * `WasabiS3Settings`\n * `read()`\n * `read_from_digital_ocean()`\n * `read_from_wasabi()`\n* pathway.io.minio package\n * `MinIOSettings`\n * `read()`\n* pathway.io.gdrive package\n * `read()`\n* pathway.io.sqlite package\n * `read()`\n"} -{"doc": "Write only\n* pathway.io.elasticsearch package\n * `write()`\n* pathway.io.logstash package\n * `write()`\n* pathway.io.null package\n * `write()`\n* pathway.io.postgres package\n * `write()`\n * `write_snapshot()`\nclass pw.io.CsvParserSettings(delimiter=',', quote='\"', escape=None, enable_double_quote_escapes=True, enable_quoting=True, comment_character=None)\nClass representing settings for the CSV parser.\n* Parameters\n * delimiter \u2013 Field delimiter to use when parsing CSV.\n * quote \u2013 Quote character to use when parsing CSV.\n * escape \u2013 What character to use for escaping fields in CSV.\n * enable_double_quote_escapes \u2013 Enable escapes of double quotes.\n * enable_quoting \u2013 Enable quoting for the fields.\n * comment_character \u2013 If specified, the lines starting with the comment character will be treated as comments and therefore, will be ignored by parser\nclass pw.io.OnChangeCallback(*args, kwargs)\nThe callback to be called on every change in the table. It is required to be\ncallable and to accept four parameters: the key, the row changed, the time of the\nchange in milliseconds and the flag stating if the change had been an addition\nof the row.\nclass pw.io.OnFinishCallback(*args, kwargs)\nThe callback function to be called when the stream of changes ends. It will be called on each engine worker separately.\nFunctions\npw.io.subscribe(table, on_change, on_end=>)\nCalls a callback function on_change on every change happening in table.\n* Parameters\n * table \u2013 the table to subscribe.\n * on_change (`OnChangeCallback`) \u2013 the callback to be called on every change in the table. The\n function is required to accept three parameters: the row changed, the time\n of the change in microseconds and the flag stating if the change had been an\n addition of the row. These parameters of the callback are expected to have\n names row, time and is_addition respectively.\n * on_end (`OnFinishCallback`) \u2013 the callback to be called when the stream of changes ends.\n It will be called on each engine worker separately.\n* Returns\n None\nExample:\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown('''\n | pet | owner | age | __time__ | __diff__\n 1 | dog | Alice | 10 | 0 | 1\n 2 | cat | Alice | 8 | 1 | 1\n 3 | dog | Bob | 7 | 2 | 1\n 2 | cat | Alice | 8 | 3 | -1\n''')\ndef on_change(key: pw.Pointer, row: dict, time: int, is_addition: bool):\n print(f\"{row}, {time}, {is_addition}\")\ndef on_end():\n print(\"End of stream.\")\npw.io.subscribe(table, on_change, on_end)\npw.run(monitoring_level=pw.MonitoringLevel.NONE)\n```\n::\nResult\n```\n{'pet': 'dog', 'owner': 'Alice', 'age': 10}, 0, True\n{'pet': 'cat', 'owner': 'Alice', 'age': 8}, 2, True\n{'pet': 'dog', 'owner': 'Bob', 'age': 7}, 4, True\n{'pet': 'cat', 'owner': 'Alice', 'age': 8}, 6, False\nEnd of stream.\n```\n::\n::\n"} -{"doc": "Configuration classes\nclass pw.persistence.Backend(engine_data_storage, fs_path=None)\nThe settings of a backend, which is used to persist the computation state. There\nare two kinds of data backends: metadata backend and snapshot backend. Both are\nconfigurable via this class.\nclassmethod filesystem(path)\nConfigure the filesystem backend.\n* Parameters\n path (`str` | `PathLike`\\[`str`\\]) \u2013 the path to the root directory in the file system, which will be used to store the persisted data.\n* Returns\n Class instance denoting the filesystem storage backend with root directory at `path`.\nclassmethod s3(root_path, bucket_settings)\nConfigure the S3 backend.\n* Parameters\n * root_path (`str`) \u2013 path to the root in the S3 storage, which will be used to store persisted data;\n * bucket_settings (`AwsS3Settings`) \u2013 the settings for S3 bucket connection in the same format as they are used by S3 connectors.\n* Returns\n Class instance denoting the S3 storage backend with root directory as\n `root_path` and connection settings given by `bucket_settings`.\nclass pw.persistence.Config(*, snapshot_interval_ms=0, metadata_storage, snapshot_storage, snapshot_access, replay_mode, continue_after_replay)\nConfigure the data persistence. An instance of this class should be passed as a\nparameter to pw.run in case persistence is enabled.\nPlease note that if you\u2019d like to use the same backend for both metadata and\nsnapshot storages, you can use the convenience method `simple_config`.\n* Parameters\n * metadata_storage (`Backend`) \u2013 metadata backend configuration;\n * snapshot_storage (`Backend`) \u2013 snapshots backend configuration;\n * snapshot_interval_ms (`int`) \u2013 the desired duration between snapshot updates in milliseconds;\nclassmethod simple_config(backend, snapshot_interval_ms=0, snapshot_access=, replay_mode=, continue_after_replay=True)\nConstruct config from a single instance of the `Backend` class, using this backend to persist metadata and snapshot.\n* Parameters\n * backend (`Backend`) \u2013 storage backend settings;\n * snapshot_interval_ms \u2013 the desired freshness of the persisted snapshot in milliseconds. The greater the value is, the more the amount of time that the snapshot may fall behind, and the less computational resources are required.\n* Returns\n Persistence config.\n"} -{"doc": "---\ntitle: SQL API\ndescription: 'Using SQL commands with Pathway using pw.sql function.'\nnotebook_export_path: documentation/sql_api.ipynb\n---\n\u00a0\u00a0\n# Using SQL with Pathway\nPerform SQL commands using Pathway's `pw.sql` function.\n---\nPathway provides a very simple way to use SQL commands directly in your Pathway application: the use of `pw.sql`.\nPathway is significantly different from a usual SQL database, and not all SQL operations are available in Pathway.\nIn the following, we present the SQL operations which are compatible with Pathway and how to use `pw.sql`.\nThis article is a summary of dos and don'ts on how to use Pathway to execute SQL queries, this is not an introduction to SQL.\n"} -{"doc": "Usage\nYou can very easily execute a SQL command by doing the following:\n```python\npw.sql(query, tab=t)\n```\nThis will execute the SQL command `query` where the Pathway table `t` (Python local variable) can be referred to as `tab` (SQL table name) inside `query`.\nMore generally, you can pass an arbitrary number of tables associations `name, table` using `kwargs`: `pw.sql(query, tab1=t1, tab2=t2,.., tabn=tn)`.\n"} -{"doc": "Example\n```python\nimport pathway as pw\nt = pw.debug.table_from_markdown(\n \"\"\"\n | a | b\n 1 | 1 | 2\n 2 | 4 | 3\n 3 | 4 | 7\n \"\"\"\n)\nret = pw.sql(\"SELECT * FROM tab WHERE a2\", tab=t)\npw.debug.compute_and_print(result_where)\n```\n [2023-10-19T14:44:28]:INFO:Preparing Pathway computation\n | a | b\n ^Z3QWT29... | 4 | 3\n ^3CZ78B4... | 4 | 7\n"} -{"doc": "`GROUP BY`\nYou can use `GROUP BY` to group rows with the same value for a given column, and to use an aggregate function over the grouped rows.\n```python\nresult_groupby = pw.sql(\"SELECT a, SUM(b) FROM tab GROUP BY a\", tab=t)\npw.debug.compute_and_print(result_groupby)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | _col_1\n ^YYY4HAB... | 1 | 2\n ^3HN31E1... | 4 | 10\n\u26a0\ufe0f `GROUP BY` and `JOIN` should not be used together in a single `SELECT`.\n#### Aggregation functions\nWith `GROUP BY`, you can use the following aggregation functions:\n- `AVG`\n- `COUNT`\n- `MAX`\n- `MIN`\n- `SUM`\n\u26a0\ufe0f Pathway reducers (`pw.count`, `pw.sum`, etc.) aggregate over `None` values, while traditional SQL aggregate functions skip `NULL` values: be careful to remove all the undefined values before using an aggregate function.\n"} -{"doc": "`AS` (alias)\nPathway supports both notations: `old_name as new_name` and `old_name new_name`.\n```python\nresult_alias = pw.sql(\"SELECT b, a AS c FROM tab\", tab=t)\npw.debug.compute_and_print(result_alias)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | b | c\n ^YYY4HAB... | 2 | 1\n ^Z3QWT29... | 3 | 4\n ^3CZ78B4... | 7 | 4\n```python\nresult_alias = pw.sql(\"SELECT b, a c FROM tab\", tab=t)\npw.debug.compute_and_print(result_alias)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | b | c\n ^YYY4HAB... | 2 | 1\n ^Z3QWT29... | 3 | 4\n ^3CZ78B4... | 7 | 4\n"} -{"doc": "`UNION`\nPathway provides the standard `UNION` SQL operator.\nNote that `UNION` requires matching column names.\n```python\nt_union = pw.debug.table_from_markdown(\n \"\"\"\n | a | b\n 4 | 9 | 3\n 5 | 2 | 7\n \"\"\"\n)\nresult_union = pw.sql(\"SELECT * FROM tab UNION SELECT * FROM tab2\", tab=t, tab2=t_union)\npw.debug.compute_and_print(result_union)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^KYCVNKF... | 1 | 2\n ^856GZ16... | 2 | 7\n ^H3J0A0V... | 4 | 3\n ^GX1QVN0... | 4 | 7\n ^7HC68KR... | 9 | 3\n"} -{"doc": "`INTERSECT`\nPathway provides the standard `INTERSECT` SQL operator.\nNote that `INTERSECT` requires matching column names.\n```python\nt_inter = pw.debug.table_from_markdown(\n \"\"\"\n | a | b\n 4 | 9 | 3\n 5 | 2 | 7\n 6 | 1 | 2\n \"\"\"\n)\nresult_inter = pw.sql(\n \"SELECT * FROM tab INTERSECT SELECT * FROM tab2\", tab=t, tab2=t_inter\n)\npw.debug.compute_and_print(result_inter)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^KYCVNKF... | 1 | 2\n\u26a0\ufe0f `INTERSECT` does not support `INTERSECT ALL` (coming soon).\n"} -{"doc": "`JOIN`\nPathway provides different join operations: `INNER JOIN`, `LEFT JOIN` (or `LEFT OUTER JOIN`), `RIGHT JOIN` (or `RIGHT OUTER JOIN`), `SELF JOIN`, and `CROSS JOIN`.\n```python\nt_join = pw.debug.table_from_markdown(\n \"\"\"\n | b | c\n 4 | 4 | 9\n 5 | 3 | 4\n 6 | 7 | 5\n \"\"\"\n)\nresult_join = pw.sql(\n \"SELECT * FROM left_table INNER JOIN right_table ON left_table.b==right_table.b\",\n left_table=t,\n right_table=t_join,\n)\npw.debug.compute_and_print(result_join)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b | c\n ^J1AVR2S... | 4 | 3 | 4\n ^8V184A9... | 4 | 7 | 5\n\u26a0\ufe0f `GROUP BY` and `JOIN` should not be used together in a single `SELECT`.\n\u26a0\ufe0f `NATURAL JOIN` and `FULL JOIN` are not supported (coming soon).\n"} -{"doc": "`WITH`\nIn addition to being placed inside a `WHERE` clause, subqueries can also be performed using the `WITH` keyword:\n```python\nresult_with = pw.sql(\n \"WITH group_table (a, sumB) AS (SELECT a, SUM(b) FROM tab GROUP BY a) SELECT sumB FROM group_table\",\n tab=t,\n)\npw.debug.compute_and_print(result_with)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | sumB\n ^YYY4HAB... | 2\n ^3HN31E1... | 10\n"} -{"doc": "Boolean and Arithmetic Expressions\nWith the `SELECT ...` and `WHERE ...` clauses, you can use the following operators:\n- boolean operators: `AND`, `OR`, `NOT`\n- arithmetic operators: `+`, `-`, `*`, `/`, `DIV`, `MOD`, `==`, `!=`, `<`, `>`, `<=`, `>=`, `<>`\n- NULL\n```python\nresult_bool = pw.sql(\"SELECT a,b FROM tab WHERE b-a>0 AND a>3\", tab=t)\npw.debug.compute_and_print(result_bool)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^3CZ78B4... | 4 | 7\nBoth `!=` and `<>` can be used to check non-equality.\n```python\nresult_neq = pw.sql(\"SELECT a,b FROM tab WHERE a != 4 OR b <> 3\", tab=t)\npw.debug.compute_and_print(result_neq)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^YYY4HAB... | 1 | 2\n ^3CZ78B4... | 4 | 7\n`NULL` can be used to filter out rows with missing values:\n```python\nt_null = pw.debug.table_from_markdown(\n \"\"\"\n | a | b\n 1 | 1 | 2\n 2 | 4 |\n 3 | 4 | 7\n \"\"\"\n)\nresult_null = pw.sql(\"SELECT a, b FROM tab WHERE b IS NOT NULL \", tab=t_null)\npw.debug.compute_and_print(result_null)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | a | b\n ^YYY4HAB... | 1 | 2\n ^3CZ78B4... | 4 | 7\nYou can use single row result subqueries in the `WHERE` clause to filter a table based on the subquery results:\n```python\nt_subqueries = pw.debug.table_from_markdown(\n \"\"\"\n | employee | salary\n 1 | 1 | 10\n 2 | 2 | 11\n 3 | 3 | 12\n \"\"\"\n)\nresult_subqueries = pw.sql(\n \"SELECT employee, salary FROM t WHERE salary >= (SELECT AVG(salary) FROM t)\",\n t=t_subqueries,\n)\npw.debug.compute_and_print(result_subqueries)\n```\n [2023-10-19T14:44:29]:INFO:Preparing Pathway computation\n | employee | salary\n ^Z3QWT29... | 2 | 11\n ^3CZ78B4... | 3 | 12\n\u26a0\ufe0f For now, only single row result subqueries are supported.\nCorrelated subqueries and the associated operations `ANY`, `NONE`, and `EVERY` (or its alias `ALL`) are currently not supported.\n"} -{"doc": "Subpackages\n* pathway.xpacks.spatial package\n * Submodules\n * pathway.xpacks.spatial.geofencing module\n * `GeofenceIndex`\n * `GeofenceIndex.join_enclosing_geofences()`\n * `is_in_geofence()`\n * pathway.xpacks.spatial.h3 module\n * `h3_cover_geojson()`\n * pathway.xpacks.spatial.index module\n * `H3Index`\n * `H3Index.join_on_distance()`\n"} -{"doc": "---\ntitle: pathway.io.s3_csv package\nsidebar: 'API'\nnavigation: false\n---\n# pathway.io.s3_csv package\nFunctions\npw.io.s3_csv.read(path, *, aws_s3_settings=None, schema=None, csv_settings=None, mode='streaming', autocommit_duration_ms=1500, persistent_id=None, debug_data=None, value_columns=None, id_columns=None, types=None, default_values=None, kwargs)\nReads a table from one or several objects in Amazon S3 bucket.\nIn case the prefix of S3 path is specified, and there are several objects lying\nunder this prefix, their order is determined according to their modification times:\nthe smaller the modification time is, the earlier the file will be passed to the\nengine.\n* Parameters\n * path (`str`) \u2013 Path to an object or to a folder of objects in Amazon S3 bucket.\n * aws_s3_settings (`Optional`\\[`AwsS3Settings`\\]) \u2013 Connection parameters for the S3 account and the bucket.\n * schema (`Optional`\\`type`\\[[`Schema`\\]\\]) \u2013 Schema of the resulting table.\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 The settings for the CSV parser.\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new input\n files in the bucket, which fall under the path prefix. Set it to \u201cstatic\u201d, it will only\n consider the available data and ingest all of it in one commit. Default value is\n \u201cstreaming\u201d.\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n * persistent_id (`Optional`\\[`str`\\]) \u2013 (unstable) An identifier, under which the state of the table\n will be persisted or `None`, if there is no need to persist the state of this table.\n When a program restarts, it restores the state for all input tables according to what\n was saved for their `persistent_id`. This way it\u2019s possible to configure the start of\n computations from the moment they were terminated last time.\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n * value_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 Names of the columns to be extracted from the files. \\[will be deprecated soon\\]\n * id_columns (`Optional`\\[`list`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly. \\[will be deprecated soon\\]\n * types (`Optional`\\[`dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data\n types (`pw.Type`) of the values of those columns. This parameter is optional, and if not\n provided the default type is `pw.Type.ANY`. \\[will be deprecated soon\\]\n * default_values (`Optional`\\[`dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value. \\[will be deprecated soon\\]\n* Returns\n *Table* \u2013 The table read.\nExample:\nLet\u2019s consider an object store, which is hosted in Amazon S3. The store contains\ndatasets in the respective bucket and is located in the region eu-west-3. The goal\nis to read the dataset, located under the path `animals/` in this bucket.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nclass InputSchema(pw.Schema):\n owner: str\n pet: str\nt = pw.io.s3_csv.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3_csv.AwsS3Settings(\n bucket_name=\"datasets\",\n region=\"eu-west-3\",\n access_key=os.environ[\"S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"S3_SECRET_ACCESS_KEY\"],\n ),\n schema=InputSchema,\n)\n```\nAlternatively, there might be a need to read the data from S3 storage, which is\nhosted in a different cloud and, therefore, requires to specify a custom endpoint.\nIt can be done with the usage of an extra parameter endpoint of AwsS3Settings\nobject. An example for the OVH-hosted bucket would then look as follows:\n```python\nimport os\nimport pathway as pw\nt = pw.io.s3_csv.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3_csv.AwsS3Settings(\n bucket_name=\"datasets\",\n region=\"rbx\",\n endpoint=\"s3.rbx.io.cloud.ovh.net\",\n access_key=os.environ[\"OVH_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"OVH_S3_SECRET_ACCESS_KEY\"],\n ),\n schema=InputSchema,\n)\n```\nIn case you are dealing with custom S3 buckets, there are two ways\nto work with paths in requests. The default and the one used by AWS S3 is a\nvirtually hosted-style. However, some installations of S3 in, for example, min.io\ndo require to use of path-style requests. If this is the case, you can use the\nparameter with_path_style of AwsS3Settings.\nThen, the code may look as follows:\n```python\nimport os\nimport pathway as pw\nt = pw.io.s3_csv.read(\n \"animals/\",\n aws_s3_settings=pw.io.s3_csv.AwsS3Settings(\n bucket_name=\"datasets\",\n endpoint=\"avv749.stackhero-network.com\",\n access_key=os.environ[\"MINIO_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"MINIO_S3_SECRET_ACCESS_KEY\"],\n with_path_style=True,\n ),\n schema=InputSchema,\n)\n```\n"} diff --git a/examples/pipelines/contextful_geometric/docker-compose.yml b/examples/pipelines/contextful_geometric/docker-compose.yml deleted file mode 100644 index 20a3924..0000000 --- a/examples/pipelines/contextful_geometric/docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: "3.8" -services: - pathway: - build: - context: . - ports: - - "8080:8080" - environment: - OPENAI_API_KEY: - PATHWAY_PERSISTENT_STORAGE: - volumes: - - "./data:/app/data" - streamlit_ui: - depends_on: - - pathway - build: - context: ./ui - ports: - - "8501:8501" - environment: - PATHWAY_REST_CONNECTOR_HOST: "pathway" diff --git a/examples/pipelines/contextful_geometric/ui/Dockerfile b/examples/pipelines/contextful_geometric/ui/Dockerfile deleted file mode 100644 index 78e2121..0000000 --- a/examples/pipelines/contextful_geometric/ui/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.11 - -WORKDIR /app - -RUN pip install streamlit python-dotenv - -COPY . . - -EXPOSE 8501 - -CMD ["streamlit", "run", "server.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/examples/pipelines/contextful_geometric/ui/server.py b/examples/pipelines/contextful_geometric/ui/server.py deleted file mode 100644 index a8160fb..0000000 --- a/examples/pipelines/contextful_geometric/ui/server.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -import requests -import streamlit as st -from dotenv import load_dotenv - -with st.sidebar: - st.markdown( - "[View the source code on GitHub](https://github.com/pathwaycom/llm-app)" - ) - -# Load environment variables -load_dotenv() -api_host = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "127.0.0.1") -api_port = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", 8080)) - - -# Streamlit UI elements -st.title("LLM App") - - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages from history on app rerun -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - -# React to user input -if prompt := st.chat_input("How can I help you today?"): - # Display user message in chat message container - with st.chat_message("user"): - st.markdown(prompt) - - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - url = f"http://{api_host}:{api_port}/" - data = {"query": prompt, "user": "user"} - - response = requests.post(url, json=data) - - if response.status_code == 200: - response = response.json() - with st.chat_message("assistant"): - st.markdown(response) - st.session_state.messages.append({"role": "assistant", "content": response}) - else: - st.error(f"Failed to send data. Status code: {response.status_code}") diff --git a/examples/pipelines/contextful_parsing/Dockerfile b/examples/pipelines/contextful_parsing/Dockerfile deleted file mode 100644 index 11936a0..0000000 --- a/examples/pipelines/contextful_parsing/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM pathwaycom/pathway:latest -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y python3-opencv \ - && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* - -COPY . . -EXPOSE 8080 - -CMD ["python", "app.py"] diff --git a/examples/pipelines/contextful_parsing/README.md b/examples/pipelines/contextful_parsing/README.md deleted file mode 100644 index 11df94c..0000000 --- a/examples/pipelines/contextful_parsing/README.md +++ /dev/null @@ -1,72 +0,0 @@ -

- - GCP Logo Deploy with GCP - | - - Render Logo Deploy with Render - -

- -# RAG pipeline with Pathway + Unstructured: getting answers based on PDFs - -This example implements a RAG pipeline, similarly to [contextful pipeline](). It uses, however, [Unstructured](https://unstructured.io/) library for parsing documents, e.g. PDFs, which are then split into smaller chunks. - -## How to run the project - -### Setup environment: -Set your env variables in the .env file placed in this directory. - -```bash -OPENAI_API_KEY=sk-... -PATHWAY_DATA_DIR= # If unset, defaults to ./data/. If running with Docker, when you change this variable you may need to change the volume mount. -PATHWAY_PERSISTENT_STORAGE= # Set this variable if you want to use caching -``` - -### Run with Docker - -To run jointly the Alert pipeline and a simple UI execute: - -```bash -docker compose up --build -``` - -Then, the UI will run at http://0.0.0.0:8501 by default. You can access it by following this URL in your web browser. - -The `docker-compose.yml` file declares a [volume bind mount](https://docs.docker.com/reference/cli/docker/container/run/#volume) that makes changes to files under `data/` made on your host computer visible inside the docker container. The files in `data/live` are indexed by the pipeline - you can paste new files there and they will impact the computations. - -### Run manually - -Alternatively, you can run each service separately. - -Make sure you have installed poetry dependencies with `--extras unstructured`. -```bash -poetry install --with examples --extras unstructured -``` - -Then run: -```bash -poetry run python app.py -``` - -If all dependencies are managed manually rather than using poetry, you can alternatively use: -```bash -python app.py -``` - -To run the Streamlit UI, run: -```bash -streamlit run ui/server.py --server.port 8501 --server.address 0.0.0.0 -``` - -### Querying the pipeline - -To query the pipeline, you can call the REST API: - -```bash -curl --data '{ - "user": "user", - "query": "What are the trends of coal imports?" -}' http://localhost:8080/ | jq -``` - -or access the Streamlit UI at `0.0.0.0:8501`. diff --git a/examples/pipelines/contextful_parsing/__init__.py b/examples/pipelines/contextful_parsing/__init__.py deleted file mode 100644 index 0565668..0000000 --- a/examples/pipelines/contextful_parsing/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .app import run - -__all__ = ["run"] diff --git a/examples/pipelines/contextful_parsing/app.py b/examples/pipelines/contextful_parsing/app.py deleted file mode 100755 index 08d1f96..0000000 --- a/examples/pipelines/contextful_parsing/app.py +++ /dev/null @@ -1,129 +0,0 @@ -""" -Microservice for a context-aware ChatGPT assistant. - -The following program reads in a collection of documents, -embeds each document using the OpenAI document embedding model, -then builds an index for fast retrieval of documents relevant to a question, -effectively replacing a vector database. - -The program then starts a REST API endpoint serving queries about programming in Pathway. - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentations pages -and sent to the OpenAI GPT-4 chat service for processing. - -Please check the README.md in this directory for how-to-run instructions. -""" - -import os - -import dotenv -import pathway as pw -from pathway.stdlib.ml.index import KNNIndex -from pathway.xpacks.llm.embedders import OpenAIEmbedder -from pathway.xpacks.llm.llms import OpenAIChat, prompt_chat_single_qa -from pathway.xpacks.llm.parsers import ParseUnstructured -from pathway.xpacks.llm.splitters import TokenCountSplitter - -# To use advanced features with Pathway Scale, get your free license key from -# https://pathway.com/features and paste it below. -# To use Pathway Community, comment out the line below. -pw.set_license_key("demo-license-key-with-telemetry") - -dotenv.load_dotenv() - - -class QueryInputSchema(pw.Schema): - query: str - user: str - - -def run( - *, - data_dir: str = os.environ.get("PATHWAY_DATA_DIR", "./data/"), - api_key: str = os.environ.get("OPENAI_API_KEY", ""), - host: str = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "0.0.0.0"), - port: int = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", "8080")), - embedder_locator: str = "text-embedding-ada-002", - embedding_dimension: int = 1536, - model_locator: str = "gpt-3.5-turbo", - max_tokens: int = 300, - temperature: float = 0.0, - **kwargs, -): - embedder = OpenAIEmbedder( - api_key=api_key, - model=embedder_locator, - retry_strategy=pw.asynchronous.FixedDelayRetryStrategy(), - cache_strategy=pw.asynchronous.DefaultCache(), - ) - - files = pw.io.fs.read( - data_dir, - mode="streaming", - format="binary", - autocommit_duration_ms=50, - ) - parser = ParseUnstructured() - documents = files.select(texts=parser(pw.this.data)) - documents = documents.flatten(pw.this.texts) - documents = documents.select(texts=pw.this.texts[0]) - - splitter = TokenCountSplitter() - documents = documents.select(chunks=splitter(pw.this.texts)) - documents = documents.flatten(pw.this.chunks) - documents = documents.select(chunk=pw.this.chunks[0]) - - enriched_documents = documents + documents.select(vector=embedder(pw.this.chunk)) - - index = KNNIndex( - enriched_documents.vector, enriched_documents, n_dimensions=embedding_dimension - ) - - query, response_writer = pw.io.http.rest_connector( - host=host, - port=port, - schema=QueryInputSchema, - autocommit_duration_ms=50, - delete_completed_queries=True, - ) - - query += query.select( - vector=embedder(pw.this.query), - ) - - query_context = query + index.get_nearest_items( - query.vector, k=3, collapse_rows=True - ).select(documents_list=pw.this.chunk) - - @pw.udf - def build_prompt(documents, query): - docs_str = "\n".join(documents) - prompt = f"Given the following documents : \n {docs_str} \nanswer this query: {query}" - return prompt - - prompt = query_context.select( - prompt=build_prompt(pw.this.documents_list, pw.this.query) - ) - - model = OpenAIChat( - api_key=api_key, - model=model_locator, - temperature=temperature, - max_tokens=max_tokens, - retry_strategy=pw.asynchronous.FixedDelayRetryStrategy(), - cache_strategy=pw.asynchronous.DefaultCache(), - ) - - responses = prompt.select( - query_id=pw.this.id, result=model(prompt_chat_single_qa(pw.this.prompt)) - ) - - response_writer(responses) - - pw.run() - - -if __name__ == "__main__": - run() diff --git a/examples/pipelines/contextful_parsing/data/20230203_alphabet_10K.pdf b/examples/pipelines/contextful_parsing/data/20230203_alphabet_10K.pdf deleted file mode 100644 index 9a1ec66..0000000 Binary files a/examples/pipelines/contextful_parsing/data/20230203_alphabet_10K.pdf and /dev/null differ diff --git a/examples/pipelines/contextful_parsing/data/Energy_Trends_June_2023.pdf b/examples/pipelines/contextful_parsing/data/Energy_Trends_June_2023.pdf deleted file mode 100644 index 79bad02..0000000 Binary files a/examples/pipelines/contextful_parsing/data/Energy_Trends_June_2023.pdf and /dev/null differ diff --git a/examples/pipelines/contextful_parsing/docker-compose.yml b/examples/pipelines/contextful_parsing/docker-compose.yml deleted file mode 100644 index 20a3924..0000000 --- a/examples/pipelines/contextful_parsing/docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: "3.8" -services: - pathway: - build: - context: . - ports: - - "8080:8080" - environment: - OPENAI_API_KEY: - PATHWAY_PERSISTENT_STORAGE: - volumes: - - "./data:/app/data" - streamlit_ui: - depends_on: - - pathway - build: - context: ./ui - ports: - - "8501:8501" - environment: - PATHWAY_REST_CONNECTOR_HOST: "pathway" diff --git a/examples/pipelines/contextful_parsing/ui/Dockerfile b/examples/pipelines/contextful_parsing/ui/Dockerfile deleted file mode 100644 index 78e2121..0000000 --- a/examples/pipelines/contextful_parsing/ui/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.11 - -WORKDIR /app - -RUN pip install streamlit python-dotenv - -COPY . . - -EXPOSE 8501 - -CMD ["streamlit", "run", "server.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/examples/pipelines/contextful_parsing/ui/server.py b/examples/pipelines/contextful_parsing/ui/server.py deleted file mode 100644 index f32134f..0000000 --- a/examples/pipelines/contextful_parsing/ui/server.py +++ /dev/null @@ -1,51 +0,0 @@ -import os - -import requests -import streamlit as st -from dotenv import load_dotenv - -with st.sidebar: - st.markdown( - "[View the source code on GitHub](https://github.com/pathwaycom/llm-app)" - ) - -# Load environment variables -load_dotenv() -api_host = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "127.0.0.1") -api_port = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", 8080)) -data_path = "../../../../examples/data/finance/" - -# Streamlit UI elements -st.title("LLM App") - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages from history on app rerun -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - -# React to user input -if prompt := st.chat_input("How can I help you today?"): - # Display user message in chat message container - with st.chat_message("user"): - st.markdown(prompt) - - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - url = f"http://{api_host}:{api_port}/" - data = {"query": prompt, "user": "user"} - - response = requests.post(url, json=data) - - if response.status_code == 200: - response = response.json() - with st.chat_message("assistant"): - st.markdown(response) - st.session_state.messages.append({"role": "assistant", "content": response}) - else: - st.error(f"Failed to send data. Status code: {response.status_code}") diff --git a/examples/pipelines/contextful_s3/Dockerfile b/examples/pipelines/contextful_s3/Dockerfile deleted file mode 100644 index a9b4776..0000000 --- a/examples/pipelines/contextful_s3/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM pathwaycom/pathway:latest -WORKDIR /app -COPY . . -EXPOSE 8080 - -CMD ["python", "app.py"] diff --git a/examples/pipelines/contextful_s3/README.md b/examples/pipelines/contextful_s3/README.md deleted file mode 100644 index 97e39bb..0000000 --- a/examples/pipelines/contextful_s3/README.md +++ /dev/null @@ -1,73 +0,0 @@ -

- - GCP Logo Deploy with GCP - | - - Render Logo Deploy with Render - -

- -# RAG pipeline with up-to-date knowledge: get answers based on documents stored in S3 - -This example implements a simple pipeline that answers questions based on documents stored in S3. - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentations pages -and sent to the OpenAI chat service for processing. - -## How to run the project - -### Setup environment: -Set your env variables in the .env file placed in this directory. - -```bash -OPENAI_API_KEY=sk-... -PATHWAY_PERSISTENT_STORAGE= # Set this variable if you want to use caching -``` -### Run with Docker - -To run jointly the Alert pipeline and a simple UI execute: - -```bash -docker compose up --build -``` - -Then, the UI will run at http://0.0.0.0:8501 by default. You can access it by following this URL in your web browser. - -### Run manually - -Alternatively, you can run each service separately. - -Make sure you have installed poetry dependencies. -```bash -poetry install --with examples -``` - -Then run: -```bash -poetry run python app.py -``` - -If all dependencies are managed manually rather than using poetry, you can alternatively use: -```bash -python app.py -``` - -To run the Streamlit UI, run: -```bash -streamlit run ui/server.py --server.port 8501 --server.address 0.0.0.0 -``` - -### Querying the pipeline - -To query the pipeline, you can call the REST API: - -```bash -curl --data '{ - "user": "user", - "query": "How to connect to Kafka in Pathway?" -}' http://localhost:8080/ | jq -``` - -or access the Streamlit UI at `0.0.0.0:8501`. diff --git a/examples/pipelines/contextful_s3/__init__.py b/examples/pipelines/contextful_s3/__init__.py deleted file mode 100644 index 0565668..0000000 --- a/examples/pipelines/contextful_s3/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .app import run - -__all__ = ["run"] diff --git a/examples/pipelines/contextful_s3/app.py b/examples/pipelines/contextful_s3/app.py deleted file mode 100644 index be9b8b2..0000000 --- a/examples/pipelines/contextful_s3/app.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Microservice for a context-aware ChatGPT assistant. - -The following program reads in a collection of documents from a public AWS S3 bucket, -embeds each document using the OpenAI document embedding model, -then builds an index for fast retrieval of documents relevant to a question, -effectively replacing a vector database. - -The program then starts a REST API endpoint serving queries about programming in Pathway. - -Each query text is first turned into a vector using OpenAI embedding service, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is built from the relevant documentations pages -and sent to the OpenAI chat service for processing. - -Please check the README.md in this directory for how-to-run instructions. -""" - -import os - -import dotenv -import pathway as pw -from pathway.stdlib.ml.index import KNNIndex -from pathway.xpacks.llm.embedders import OpenAIEmbedder -from pathway.xpacks.llm.llms import OpenAIChat, prompt_chat_single_qa - -# To use advanced features with Pathway Scale, get your free license key from -# https://pathway.com/features and paste it below. -# To use Pathway Community, comment out the line below. -pw.set_license_key("demo-license-key-with-telemetry") - -dotenv.load_dotenv() - - -class DocumentInputSchema(pw.Schema): - doc: str - - -class QueryInputSchema(pw.Schema): - query: str - user: str - - -def run( - *, - data_dir: str = os.environ.get("PATHWAY_DATA_DIR", "llm_demo/data/"), - api_key: str = os.environ.get("OPENAI_API_KEY", ""), - host: str = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "0.0.0.0"), - port: int = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", "8080")), - embedder_locator: str = "text-embedding-ada-002", - embedding_dimension: int = 1536, - model_locator: str = "gpt-3.5-turbo", - max_tokens: int = 60, - temperature: float = 0.0, - **kwargs, -): - embedder = OpenAIEmbedder( - api_key=api_key, - model=embedder_locator, - retry_strategy=pw.asynchronous.FixedDelayRetryStrategy(), - cache_strategy=pw.asynchronous.DefaultCache(), - ) - - documents = pw.io.s3.read( - data_dir, - aws_s3_settings=pw.io.s3.AwsS3Settings( - bucket_name="pathway-examples", - region="eu-central-1", - ), - format="json", - schema=DocumentInputSchema, - mode="streaming", - ) - - enriched_documents = documents + documents.select(vector=embedder(pw.this.doc)) - - index = KNNIndex( - enriched_documents.vector, enriched_documents, n_dimensions=embedding_dimension - ) - - query, response_writer = pw.io.http.rest_connector( - host=host, - port=port, - schema=QueryInputSchema, - autocommit_duration_ms=50, - delete_completed_queries=True, - ) - - query += query.select(vector=embedder(pw.this.query)) - - query_context = query + index.get_nearest_items( - query.vector, k=3, collapse_rows=True - ).select(documents_list=pw.this.doc) - - @pw.udf - def build_prompt(documents, query): - docs_str = "\n".join(documents) - prompt = f"Given the following documents : \n {docs_str} \nanswer this query: {query}" - return prompt - - prompt = query_context.select( - prompt=build_prompt(pw.this.documents_list, pw.this.query) - ) - - model = OpenAIChat( - api_key=api_key, - model=model_locator, - temperature=temperature, - max_tokens=max_tokens, - retry_strategy=pw.udfs.FixedDelayRetryStrategy(), - cache_strategy=pw.udfs.DefaultCache(), - ) - - responses = prompt.select( - query_id=pw.this.id, result=model(prompt_chat_single_qa(pw.this.prompt)) - ) - - response_writer(responses) - - pw.run() - - -if __name__ == "__main__": - run() diff --git a/examples/pipelines/contextful_s3/docker-compose.yml b/examples/pipelines/contextful_s3/docker-compose.yml deleted file mode 100644 index 55f5b1c..0000000 --- a/examples/pipelines/contextful_s3/docker-compose.yml +++ /dev/null @@ -1,19 +0,0 @@ -version: "3.8" -services: - pathway: - build: - context: . - ports: - - "8080:8080" - environment: - OPENAI_API_KEY: - PATHWAY_PERSISTENT_STORAGE: - streamlit_ui: - depends_on: - - pathway - build: - context: ./ui - ports: - - "8501:8501" - environment: - PATHWAY_REST_CONNECTOR_HOST: "pathway" diff --git a/examples/pipelines/contextful_s3/ui/Dockerfile b/examples/pipelines/contextful_s3/ui/Dockerfile deleted file mode 100644 index 78e2121..0000000 --- a/examples/pipelines/contextful_s3/ui/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.11 - -WORKDIR /app - -RUN pip install streamlit python-dotenv - -COPY . . - -EXPOSE 8501 - -CMD ["streamlit", "run", "server.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/examples/pipelines/contextful_s3/ui/server.py b/examples/pipelines/contextful_s3/ui/server.py deleted file mode 100644 index a8160fb..0000000 --- a/examples/pipelines/contextful_s3/ui/server.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -import requests -import streamlit as st -from dotenv import load_dotenv - -with st.sidebar: - st.markdown( - "[View the source code on GitHub](https://github.com/pathwaycom/llm-app)" - ) - -# Load environment variables -load_dotenv() -api_host = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "127.0.0.1") -api_port = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", 8080)) - - -# Streamlit UI elements -st.title("LLM App") - - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages from history on app rerun -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - -# React to user input -if prompt := st.chat_input("How can I help you today?"): - # Display user message in chat message container - with st.chat_message("user"): - st.markdown(prompt) - - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - url = f"http://{api_host}:{api_port}/" - data = {"query": prompt, "user": "user"} - - response = requests.post(url, json=data) - - if response.status_code == 200: - response = response.json() - with st.chat_message("assistant"): - st.markdown(response) - st.session_state.messages.append({"role": "assistant", "content": response}) - else: - st.error(f"Failed to send data. Status code: {response.status_code}") diff --git a/examples/pipelines/contextless/Dockerfile b/examples/pipelines/contextless/Dockerfile deleted file mode 100644 index aed6703..0000000 --- a/examples/pipelines/contextless/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM pathwaycom/pathway:latest -WORKDIR /app -COPY . . -EXPOSE 8080 - -CMD ["python", "app.py"] diff --git a/examples/pipelines/contextless/README.md b/examples/pipelines/contextless/README.md deleted file mode 100644 index 00c8709..0000000 --- a/examples/pipelines/contextless/README.md +++ /dev/null @@ -1,71 +0,0 @@ -

- - GCP Logo Deploy with GCP - | - - Render Logo Deploy with Render - -

- -# Contextless Pipeline - -This example implements a pipeline that answers a single question, without any context. - -## How to run the project - -### Setup environment: -Set your env variables in the .env file placed in this directory. - -```bash -OPENAI_API_KEY=sk-... -PATHWAY_PERSISTENT_STORAGE= # Set this variable if you want to use caching -``` - -### Run with Docker - -To run jointly the Alert pipeline and a simple UI execute: - -```bash -docker compose up --build -``` - -Then, the UI will run at http://0.0.0.0:8501 by default. You can access it by following this URL in your web browser. - -The `docker-compose.yml` file declares a [volume bind mount](https://docs.docker.com/reference/cli/docker/container/run/#volume) that makes changes to files under `data/` made on your host computer visible inside the docker container. The files in `data/live` are indexed by the pipeline - you can paste new files there and they will impact the computations. - -### Run manually - -Alternatively, you can run each service separately. - -Make sure you have installed poetry dependencies with `--extras unstructured`. -```bash -poetry install --with examples --extras unstructured -``` - -Then run: -```bash -poetry run python app.py -``` - -If all dependencies are managed manually rather than using poetry, you can alternatively use: -```bash -python app.py -``` - -To run the Streamlit UI, run: -```bash -streamlit run ui/server.py --server.port 8501 --server.address 0.0.0.0 -``` - -### Querying the pipeline - -To query the pipeline, you can call the REST API: - -```bash -curl --data '{ - "user": "user", - "query": "How to connect to Kafka in Pathway?" -}' http://localhost:8080/ | jq -``` - -or access the Streamlit UI at `0.0.0.0:8501`. diff --git a/examples/pipelines/contextless/__init__.py b/examples/pipelines/contextless/__init__.py deleted file mode 100644 index 0565668..0000000 --- a/examples/pipelines/contextless/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .app import run - -__all__ = ["run"] diff --git a/examples/pipelines/contextless/app.py b/examples/pipelines/contextless/app.py deleted file mode 100755 index fbb07e9..0000000 --- a/examples/pipelines/contextless/app.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -REST Microservice implementing a simple, contextless Chatbot. - -The program responds to each query by directly forwarding it to the OpenAI API. - -Please check the README.md in this directory for how-to-run instructions. -""" - -import os - -import dotenv -import pathway as pw -from pathway.xpacks.llm.llms import OpenAIChat, prompt_chat_single_qa - -# To use advanced features with Pathway Scale, get your free license key from -# https://pathway.com/features and paste it below. -# To use Pathway Community, comment out the line below. -pw.set_license_key("demo-license-key-with-telemetry") - -dotenv.load_dotenv() - - -class QueryInputSchema(pw.Schema): - query: str - user: str - - -def run( - *, - api_key: str = os.environ.get("OPENAI_API_KEY", ""), - host: str = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "0.0.0.0"), - port: int = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", "8080")), - model_locator: str = "gpt-3.5-turbo", - max_tokens: int = 60, - temperature: float = 0.8, - **kwargs, -): - query, response_writer = pw.io.http.rest_connector( - host=host, - port=port, - schema=QueryInputSchema, - autocommit_duration_ms=50, - delete_completed_queries=True, - ) - - model = OpenAIChat( - model=model_locator, - api_key=api_key, - temperature=temperature, - max_tokens=max_tokens, - retry_strategy=pw.asynchronous.FixedDelayRetryStrategy(), - cache_strategy=pw.asynchronous.DefaultCache(), - ) - - responses = query.select( - query_id=pw.this.id, - result=model(prompt_chat_single_qa(pw.this.query)), - ) - - response_writer(responses) - - pw.run() - - -if __name__ == "__main__": - run() diff --git a/examples/pipelines/contextless/docker-compose.yml b/examples/pipelines/contextless/docker-compose.yml deleted file mode 100644 index 20a3924..0000000 --- a/examples/pipelines/contextless/docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: "3.8" -services: - pathway: - build: - context: . - ports: - - "8080:8080" - environment: - OPENAI_API_KEY: - PATHWAY_PERSISTENT_STORAGE: - volumes: - - "./data:/app/data" - streamlit_ui: - depends_on: - - pathway - build: - context: ./ui - ports: - - "8501:8501" - environment: - PATHWAY_REST_CONNECTOR_HOST: "pathway" diff --git a/examples/pipelines/contextless/ui/Dockerfile b/examples/pipelines/contextless/ui/Dockerfile deleted file mode 100644 index 78e2121..0000000 --- a/examples/pipelines/contextless/ui/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.11 - -WORKDIR /app - -RUN pip install streamlit python-dotenv - -COPY . . - -EXPOSE 8501 - -CMD ["streamlit", "run", "server.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/examples/pipelines/contextless/ui/server.py b/examples/pipelines/contextless/ui/server.py deleted file mode 100644 index a8160fb..0000000 --- a/examples/pipelines/contextless/ui/server.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -import requests -import streamlit as st -from dotenv import load_dotenv - -with st.sidebar: - st.markdown( - "[View the source code on GitHub](https://github.com/pathwaycom/llm-app)" - ) - -# Load environment variables -load_dotenv() -api_host = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "127.0.0.1") -api_port = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", 8080)) - - -# Streamlit UI elements -st.title("LLM App") - - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages from history on app rerun -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - -# React to user input -if prompt := st.chat_input("How can I help you today?"): - # Display user message in chat message container - with st.chat_message("user"): - st.markdown(prompt) - - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - url = f"http://{api_host}:{api_port}/" - data = {"query": prompt, "user": "user"} - - response = requests.post(url, json=data) - - if response.status_code == 200: - response = response.json() - with st.chat_message("assistant"): - st.markdown(response) - st.session_state.messages.append({"role": "assistant", "content": response}) - else: - st.error(f"Failed to send data. Status code: {response.status_code}") diff --git a/examples/pipelines/local/Dockerfile b/examples/pipelines/local/Dockerfile deleted file mode 100644 index 274de92..0000000 --- a/examples/pipelines/local/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM pathwaycom/pathway:latest - -WORKDIR /app -COPY . . -EXPOSE 8080 - -CMD ["python", "app.py"] diff --git a/examples/pipelines/local/README.md b/examples/pipelines/local/README.md deleted file mode 100644 index 8c8fa7f..0000000 --- a/examples/pipelines/local/README.md +++ /dev/null @@ -1,71 +0,0 @@ -

- - GCP Logo Deploy with GCP - | - - Render Logo Deploy with Render - -

- -# RAG pipeline run locally with up-to-date knowledge: get answers based on documents stored locally - -This pipeline is similar to the [contextful pipeline](), but relies on local computations, rather than querying external API. To do that it uses [HuggingFace](https://huggingface.co/) for the chat model and [Sentence Transformers](https://www.sbert.net/) for the embedding model. - -## How to run the project - -### Setup environment: -Set your env variables in the .env file placed in this directory. - -```bash -PATHWAY_DATA_DIR= # If unset, defaults to ./data/. If running with Docker, when you change this variable you may need to change the volume mount. -PATHWAY_PERSISTENT_STORAGE= # Set this variable if you want to use caching -``` - -### Run with Docker - -To run jointly the Alert pipeline and a simple UI execute: - -```bash -docker compose up --build -``` - -Then, the UI will run at http://0.0.0.0:8501 by default. You can access it by following this URL in your web browser. - -The `docker-compose.yml` file declares a [volume bind mount](https://docs.docker.com/reference/cli/docker/container/run/#volume) that makes changes to files under `data/` made on your host computer visible inside the docker container. The files in `data/live` are indexed by the pipeline - you can paste new files there and they will impact the computations. - -### Run manually - -Alternatively, you can run each service separately. - -Make sure you have installed poetry dependencies. -```bash -poetry install --with examples -``` - -Then run: -```bash -poetry run python app.py -``` - -If all dependencies are managed manually rather than using poetry, you can alternatively use: -```bash -python app.py -``` - -To run the Streamlit UI, run: -```bash -streamlit run ui/server.py --server.port 8501 --server.address 0.0.0.0 -``` - -### Querying the pipeline - -To query the pipeline, you can call the REST API: - -```bash -curl --data '{ - "user": "user", - "query": "How to connect to Kafka in Pathway?" -}' http://localhost:8080/ | jq -``` - -or access the Streamlit UI at `0.0.0.0:8501`. diff --git a/examples/pipelines/local/__init__.py b/examples/pipelines/local/__init__.py deleted file mode 100644 index 0565668..0000000 --- a/examples/pipelines/local/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .app import run - -__all__ = ["run"] diff --git a/examples/pipelines/local/app.py b/examples/pipelines/local/app.py deleted file mode 100644 index 4985e85..0000000 --- a/examples/pipelines/local/app.py +++ /dev/null @@ -1,130 +0,0 @@ -""" -Microservice for a privacy preserving LLM assistant. - -The following program reads in a collection of documents from local directory, -embeds each document using a locally deployed SentenceTransformer, -then builds an index for fast retrieval of documents relevant to a question, -effectively replacing a vector database. - -The program then starts a REST API endpoint serving queries about programming in Pathway. - -Each query text is first turned into a vector using the SentenceTransformer, -then relevant documentation pages are found using a Nearest Neighbor index computed -for documents in the corpus. A prompt is build from the relevant documentations pages -and run through a local LLM downloaded form the HuggingFace repository. - -Because of restrictions of model you need to be careful about the length of prompt with -the embedded documents. In this example this is solved with cropping the prompt to a set -length - the query is in the beginning of the prompt, so it won't be removed, but some -parts of documents to be omitted from the query. -Depending on the length of documents and the model you use this may not be necessary or -you can use some more refined method of shortening your prompts. - -Please check the README.md in this directory for how-to-run instructions. -""" - -import os - -import dotenv -import pathway as pw -from pathway.stdlib.ml.index import KNNIndex -from pathway.xpacks.llm.embedders import SentenceTransformerEmbedder -from pathway.xpacks.llm.llms import HFPipelineChat, prompt_chat_single_qa - -# To use advanced features with Pathway Scale, get your free license key from -# https://pathway.com/features and paste it below. -# To use Pathway Community, comment out the line below. -pw.set_license_key("demo-license-key-with-telemetry") - -dotenv.load_dotenv() - - -class DocumentInputSchema(pw.Schema): - doc: str - - -class QueryInputSchema(pw.Schema): - query: str - user: str - - -def run( - *, - data_dir: str = os.environ.get("PATHWAY_DATA_DIR", "./data/"), - host: str = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "0.0.0.0"), - port: int = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", "8080")), - model_locator: str = os.environ.get("MODEL", "gpt2"), - embedder_locator: str = os.environ.get("EMBEDDER", "intfloat/e5-large-v2"), - max_tokens: int = 60, - device: str = "cpu", - **kwargs, -): - embedder = SentenceTransformerEmbedder(model=embedder_locator, device=device) - embedding_dimension = len(embedder.__wrapped__(".")) - - documents = pw.io.jsonlines.read( - data_dir, - schema=DocumentInputSchema, - mode="streaming", - autocommit_duration_ms=50, - ) - - enriched_documents = documents + documents.select(vector=embedder(pw.this.doc)) - - index = KNNIndex( - enriched_documents.vector, enriched_documents, n_dimensions=embedding_dimension - ) - - query, response_writer = pw.io.http.rest_connector( - host=host, - port=port, - schema=QueryInputSchema, - autocommit_duration_ms=50, - delete_completed_queries=True, - ) - - query += query.select( - vector=embedder(pw.this.query), - ) - - query_context = query + index.get_nearest_items( - query.vector, k=3, collapse_rows=True - ).select(documents_list=pw.this.doc) - - @pw.udf - def build_prompt(documents, query): - docs_str = "\n".join(documents) - prompt = f"You are given a query: {query}\n Answer this query based on the following documents: \n {docs_str}" - return prompt - - prompt = query_context.select( - prompt=build_prompt(pw.this.documents_list, pw.this.query) - ) - - model = HFPipelineChat( - model=model_locator, - device=device, - return_full_text=False, - max_new_tokens=max_tokens, - ) - - # Cropping the prompt so that it is short enough for the model. Depending on input documents - # and chosen model this may not be necessary. - prompt = prompt.select( - prompt=model.crop_to_max_length( - input_string=pw.this.prompt, max_prompt_length=500 - ) - ) - - responses = prompt.select( - query_id=pw.this.id, - result=model(prompt_chat_single_qa(pw.this.prompt)), - ) - - response_writer(responses) - - pw.run() - - -if __name__ == "__main__": - run() diff --git a/examples/pipelines/local/data/pathway-docs-small.jsonl b/examples/pipelines/local/data/pathway-docs-small.jsonl deleted file mode 100644 index 6d48ddf..0000000 --- a/examples/pipelines/local/data/pathway-docs-small.jsonl +++ /dev/null @@ -1,6 +0,0 @@ -{"doc": "The `pw.io.minio.read()` function reads a table from one or several objects in CSV format from an S3 bucket in MinIO. It takes in parameters like path, minio_settings, value_columns, id_columns, csv_settings, mode, types, default_values, autocommit_duration_ms, persistent_id, and debug_data. It returns a Table object, which contains the data from the CSV files. An example usage of this function is also provided.\npw.io.minio.read(path, minio_settings, value_columns, id_columns=None, csv_settings=None, mode='streaming', types=None, default_values=None, autocommit_duration_ms=None, persistent_id=None, debug_data=None, \\*\\*kwargs)\nReads a table from one or several objects in CSV format from S3 bucket in MinIO.\n\nIn case the prefix is specified, and there are several objects lying under this\nprefix, their order is determined according to their modification times: the smaller\nthe modification time is, the earlier the file will be passed to the engine.\n\n\n* Parameters\n\n * path (`str`) \u2013 Path to an object or to a folder of objects in MinIO S3 bucket.\n\n * minio_settings (`MinIOSettings`) \u2013 Connection parameters for the MinIO account and the bucket.\n\n * value_columns (`List`\\[`str`\\]) \u2013 Names of the columns to be extracted from the files.\n\n * id_columns (`Optional`\\[`List`\\[`str`\\]\\]) \u2013 In case the table should have a primary key generated according to\n a subset of its columns, the set of columns should be specified in this field.\n Otherwise, the primary key will be generated randomly.\n\n * csv_settings (`Optional`\\[`CsvParserSettings`\\]) \u2013 The settings for the CSV parser.\n\n * mode (`str`) \u2013 If set to \u201cstreaming\u201d, the engine will wait for the new input files in the bucket, which fall under the path prefix. Set it to \u201cstatic\u201d, it will onlyconsider the available data and ingest all of it in one commit. Default value is\u201dstreaming\u201d.\n\n * types (`Optional`\\[`Dict`\\[`str`, `PathwayType`\\]\\]) \u2013 Dictionary containing the mapping between the columns and the data types (`pw.Type`) of the values of those columns. This parameter is optional, and if not provided the default type is `pw.Type.ANY`.\n\n * default_values (`Optional`\\[`Dict`\\[`str`, `Any`\\]\\]) \u2013 dictionary containing default values for columns replacing\n blank entries. The default value of the column must be specified explicitly,\n otherwise there will be no default value.\n\n * autocommit_duration_ms (`Optional`\\[`int`\\]) \u2013 the maximum time between two commits. Every\n autocommit_duration_ms milliseconds, the updates received by the connector are\n committed and pushed into Pathway\u2019s computation graph.\n\n * persistent_id (`Optional`\\[`int`\\]) \u2013 (unstable) An identifier, under which the state of the table will be persisted or `None`, if there is no need to persist the state of this table. When a program restarts, it restores the state for all input tables according to what was saved for their `persistent_id`. This way it\u2019s possible to configure the start of computations from the moment they were terminated last time.\n\n * debug_data \u2013 Static data replacing original one when debug mode is active.\n\n\n\n* Returns\n\n *Table* \u2013 The table read.\n\n\nExample:\n\nConsider that there is a table, which is stored in CSV format in the min.io S3\nbucket. Then, you can use this method in order to connect and acquire its contents.\n\nIt may look as follows:\n\n\n```python\nimport os\nimport pathway as pw\nt = pw.io.minio.read(\n \"animals/\",\n minio_settings=pw.io.minio.MinIOSettings(\n bucket_name=\"datasets\",\n endpoint=\"avv749.stackhero-network.com\",\n access_key=os.environ[\"MINIO_S3_ACCESS_KEY\"],\n secret_access_key=os.environ[\"MINIO_S3_SECRET_ACCESS_KEY\"],\n ),\n value_columns=[\"owner\", \"pet\"],\n)\n```\n"} -{"doc": "This documentation describes the DateTimeNamespace class in the Pathway framework. It contains methods related to DateTimes, which can be accessed using the \"dt\" attribute of an expression. The example code demonstrates how to use these methods to create a table with datetime values and then extract the day values from them.\nclass pw.DateTimeNamespace(expression)\n\nA module containing methods related to DateTimes.\nThey can be called using a dt attribute of an expression.\n\nTypical use:\n\n\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\n```\n"} -{"doc": "The `add_duration_in_timezone` function is defined in the pathway framework. It adds a given duration to a datetime value while taking into account the specified timezone. It takes two parameters, `duration` and `timezone` which can be either a `ColumnExpression` or a `Duration` object and a string respectively. The function returns a `DateTimeNaive` or `DateTimeUtc` object depending on the type of object the method was called upon. The example code demonstrates how to use the function to add two hours to a datetime value and convert it to a new timezone.\nadd_duration_in_timezone(duration, timezone)\nAdds Duration to DateTime taking into account time zone.\n\n\n* Parameters\n\n * duration (`Union`\\[`ColumnExpression`, `Duration`\\]) \u2013 Duration to be added to DateTime.\n\n * timezone (`Union`\\[`ColumnExpression`, `str`\\]) \u2013 The time zone to perform addition in.\n\n\n\n* Returns\n\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\n\n\nExample:\n\n\n\nCode\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-03-26T01:23:00\n 2 | 2023-03-27T01:23:00\n 3 | 2023-10-29T01:23:00\n 4 | 2023-10-30T01:23:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nt3 = t2.with_columns(\n new_date=pw.this.date.dt.add_duration_in_timezone(\n datetime.timedelta(hours=2), timezone=\"Europe/Warsaw\"\n ),\n)\npw.debug.compute_and_print(t3, include_id=False)\n```\n::\nResult\n```\ndate | new_date\n2023-03-26 01:23:00 | 2023-03-26 04:23:00\n2023-03-27 01:23:00 | 2023-03-27 03:23:00\n2023-10-29 01:23:00 | 2023-10-29 02:23:00\n2023-10-30 01:23:00 | 2023-10-30 03:23:00\n```\n::\n::\n"} -{"doc": "The `day()` function is part of the pathway framework and is used to extract the day from a given DateTime object. The function returns an integer value representing the day of the month (1 to 31). The documentation also provides an example of how to use the function with a sample code snippet and its output.\nday()\nExtracts day from a DateTime.\n\n\n* Returns\n\n Day as int. 1 <= day <= 31 (depending on a month)\n\n\nExample:\n\n\n\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1\n 1 | 1974-03-12T00:00:00\n 2 | 2023-03-25T12:00:00\n 3 | 2023-05-15T14:13:00\n'''\n)\ntable_with_datetime = table.select(t1=table.t1.dt.strptime(\"%Y-%m-%dT%H:%M:%S\"))\ntable_with_days = table_with_datetime.select(day=table_with_datetime.t1.dt.day())\npw.debug.compute_and_print(table_with_days, include_id=False)\n```\n::\nResult\n```\nday\n12\n15\n25\n```\n::\n::\n"} -{"doc": "The documentation describes the `days()` function in the Pathway framework which returns the total number of days in a duration. The function takes no parameters and returns an integer value representing the number of days. The example provided demonstrates how to use the function in a table with datetime values. It subtracts two datetime values to obtain a duration and then applies the `days()` function to compute the number of days in the duration. The result is a table with a new column \"days\" containing the number of days represented as integers.\ndays()\nThe total number of days in a Duration.\n\n\n* Returns\n\n Days as int.\n\n\nExample:\n\n\n\nCode\n```python\nimport pathway as pw\ntable = pw.debug.table_from_markdown(\n '''\n | t1 | t2\n 0 | 2023-03-15T00:00:00 | 2023-05-15T10:13:23\n 1 | 2023-04-15T00:00:00 | 2023-05-15T10:00:00\n 2 | 2023-05-01T10:00:00 | 2023-05-15T10:00:00\n 3 | 2023-05-15T10:00:00 | 2023-05-15T09:00:00\n 4 | 2023-05-15T10:00:00 | 2023-05-15T11:00:00\n 5 | 2023-05-16T12:13:00 | 2023-05-15T10:00:00\n 6 | 2024-05-15T14:13:23 | 2023-05-15T10:00:00\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\ntable_with_datetimes = table.select(\n t1=pw.this.t1.dt.strptime(fmt=fmt), t2=pw.this.t2.dt.strptime(fmt=fmt)\n)\ntable_with_diff = table_with_datetimes.select(diff=pw.this.t1 - pw.this.t2)\ntable_with_days = table_with_diff.select(days=pw.this[\"diff\"].dt.days())\npw.debug.compute_and_print(table_with_days, include_id=False)\n```\n::\nResult\n```\ndays\n-61\n-30\n-14\n0\n0\n1\n366\n```\n::\n::\n"} -{"doc": "The `floor(duration)` function in Pathway framework truncates a DateTime object to the precision specified by the `duration` argument. The function returns a DateTimeNaive or a DateTimeUtc object depending on the type of the object the method was called on. The function takes one parameter, the `duration` (which can be a `ColumnExpression` or a `Duration`). The documentation also provides an example code snippet to demonstrate how the function can be used to truncate DateTime objects to hours, 10-minute intervals, and 15-second intervals.\nfloor(duration)\nTruncates DateTime to precision specified by duration argument.\n\n\n* Parameters\n\n duration (`Union`\\[`ColumnExpression`, `Duration`\\]) \u2013 truncation precision\n\n\n\n* Returns\n\n DateTimeNaive or DateTimeUtc depending on the type of an object the method was called on\n\n\nExamples:\n\n\n\nCode\n```python\nimport pathway as pw\nimport datetime\nt1 = pw.debug.table_from_markdown(\n '''\n | date\n 1 | 2023-05-15T12:23:12\n 2 | 2023-05-15T12:33:21\n 3 | 2023-05-15T13:20:35\n 4 | 2023-05-15T13:51:41\n'''\n)\nfmt = \"%Y-%m-%dT%H:%M:%S\"\nt2 = t1.select(date=pw.this.date.dt.strptime(fmt=fmt))\nres = t2.with_columns(\n truncated_to_hours=pw.this.date.dt.floor(datetime.timedelta(hours=1)),\n truncated_to_10_min=pw.this.date.dt.floor(datetime.timedelta(minutes=10)),\n truncated_to_15_s=pw.this.date.dt.floor(datetime.timedelta(seconds=15)),\n)\npw.debug.compute_and_print(res, include_id=False)\n```\n::\nResult\n```\ndate | truncated_to_hours | truncated_to_10_min | truncated_to_15_s\n2023-05-15 12:23:12 | 2023-05-15 12:00:00 | 2023-05-15 12:20:00 | 2023-05-15 12:23:00\n2023-05-15 12:33:21 | 2023-05-15 12:00:00 | 2023-05-15 12:30:00 | 2023-05-15 12:33:15\n2023-05-15 13:20:35 | 2023-05-15 13:00:00 | 2023-05-15 13:20:00 | 2023-05-15 13:20:30\n2023-05-15 13:51:41 | 2023-05-15 13:00:00 | 2023-05-15 13:50:00 | 2023-05-15 13:51:30\n```\n::\n::\n"} \ No newline at end of file diff --git a/examples/pipelines/local/docker-compose.yml b/examples/pipelines/local/docker-compose.yml deleted file mode 100644 index 20a3924..0000000 --- a/examples/pipelines/local/docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: "3.8" -services: - pathway: - build: - context: . - ports: - - "8080:8080" - environment: - OPENAI_API_KEY: - PATHWAY_PERSISTENT_STORAGE: - volumes: - - "./data:/app/data" - streamlit_ui: - depends_on: - - pathway - build: - context: ./ui - ports: - - "8501:8501" - environment: - PATHWAY_REST_CONNECTOR_HOST: "pathway" diff --git a/examples/pipelines/local/ui/Dockerfile b/examples/pipelines/local/ui/Dockerfile deleted file mode 100644 index 78e2121..0000000 --- a/examples/pipelines/local/ui/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.11 - -WORKDIR /app - -RUN pip install streamlit python-dotenv - -COPY . . - -EXPOSE 8501 - -CMD ["streamlit", "run", "server.py", "--server.port", "8501", "--server.address", "0.0.0.0"] diff --git a/examples/pipelines/local/ui/server.py b/examples/pipelines/local/ui/server.py deleted file mode 100644 index a8160fb..0000000 --- a/examples/pipelines/local/ui/server.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -import requests -import streamlit as st -from dotenv import load_dotenv - -with st.sidebar: - st.markdown( - "[View the source code on GitHub](https://github.com/pathwaycom/llm-app)" - ) - -# Load environment variables -load_dotenv() -api_host = os.environ.get("PATHWAY_REST_CONNECTOR_HOST", "127.0.0.1") -api_port = int(os.environ.get("PATHWAY_REST_CONNECTOR_PORT", 8080)) - - -# Streamlit UI elements -st.title("LLM App") - - -# Initialize chat history -if "messages" not in st.session_state: - st.session_state.messages = [] - -# Display chat messages from history on app rerun -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - -# React to user input -if prompt := st.chat_input("How can I help you today?"): - # Display user message in chat message container - with st.chat_message("user"): - st.markdown(prompt) - - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - url = f"http://{api_host}:{api_port}/" - data = {"query": prompt, "user": "user"} - - response = requests.post(url, json=data) - - if response.status_code == 200: - response = response.json() - with st.chat_message("assistant"): - st.markdown(response) - st.session_state.messages.append({"role": "assistant", "content": response}) - else: - st.error(f"Failed to send data. Status code: {response.status_code}")