Merge pull request #11 from monocle2ai/kshitiz/add_github_actions_test

Added initial tests and github actions using Tox
monocle2ai · Jul 10, 2024 · 052ef13 · 052ef13
2 parents 319d026 + d079288
commit 052ef13
Show file tree

Hide file tree

Showing 11 changed files with 335 additions and 13 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,28 @@
+## Proposed changes
+
+Describe the big picture of your changes here to communicate to the maintainers why we should accept this pull request. If it fixes a bug or resolves a feature request, be sure to link to that issue.
+
+## Types of changes
+
+What types of changes does your code introduce to this project?
+_Put an `x` in the boxes that apply_
+
+- [ ] Bugfix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] Documentation Update (if none of the other choices apply)
+
+## Checklist
+
+_Put an `x` in the boxes that apply. You can also fill these out after creating the PR. If you're unsure about any of them, don't hesitate to ask. We're here to help! This is simply a reminder of what we are going to look for before merging your code._
+
+- [ ] I have read the [CONTRIBUTING](https://github.com/monocle2ai/monocle/blob/main/CONTRIBUTING.md) doc
+- [ ] I have signed the CLA
+- [ ] Lint and unit tests pass locally with my changes
+- [ ] I have added tests that prove my fix is effective or that my feature works
+- [ ] I have added the necessary documentation (if appropriate)
+- [ ] Any dependent changes have been merged and published in downstream modules
+
+## Further comments
+
+If this is a relatively large or complex change, kick off the discussion by explaining why you chose the solution you did and what alternatives you considered, etc...
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -0,0 +1,93 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+  schedule:
+    - cron: '17 14 * * 3'
+
+jobs:
+  analyze:
+    name: Analyze (${{ matrix.language }})
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners (GitHub.com only)
+    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
+    permissions:
+      # required for all workflows
+      security-events: write
+
+      # required to fetch internal or private CodeQL packs
+      packages: read
+
+      # only required for workflows in private repositories
+      actions: read
+      contents: read
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - language: python
+          build-mode: none
+        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
+        # Use `c-cpp` to analyze code written in C, C++ or both
+        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
+        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
+        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
+        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v3
+      with:
+        languages: ${{ matrix.language }}
+        build-mode: ${{ matrix.build-mode }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+    # If the analyze step fails for one of the languages you are analyzing with
+    # "We were unable to automatically build your code", modify the matrix above
+    # to set the build mode to "manual" for that language. Then modify this step
+    # to build your code.
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+    - if: matrix.build-mode == 'manual'
+      shell: bash
+      run: |
+        echo 'If you are using a "manual" build mode for one or more of the' \
+          'languages you are analyzing, replace this with the commands to build' \
+          'your code, for example:'
+        echo '  make bootstrap'
+        echo '  make release'
+        exit 1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/repolinter.yml b/.github/workflows/repolinter.yml
@@ -0,0 +1,32 @@
+
+
+name: Repolinter
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ "main" ]
+  schedule:
+    - cron: '18 8 * * 0'
+
+jobs:
+  eslint:
+    name: Run repolinter
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      security-events: write
+      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install Repolint
+        run: |
+          npm install -g repolinter
+
+      - name: Run ESLint
+        run: repolinter lint .
+        continue-on-error: true
diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
@@ -0,0 +1,32 @@
+
+
+name: Tox
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ "main" ]
+  schedule:
+    - cron: '18 8 * * 0'
+
+jobs:
+  tox:
+    name: Run tox
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      security-events: write
+      actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install tox
+        run: |
+          python -m pip install --user tox
+
+      - name: Run tox tests
+        run: python -m tox -c tox.ini
+        continue-on-error: true
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,6 @@
 storage
 traces.txt
 dist
+.tox
+.DS_Store
+Pipfile**
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,7 +33,6 @@ dev = [
   'langchain-chroma==0.1.1',
   'langchain-community==0.2.5',
   'langchain==0.2.5',
-  'haystack-ai==2.2.3',
   'datasets==2.20.0',
   'numpy==1.26.4',
   'types-requests==2.31.0.20240106',

diff --git a/src/monocle_apptrace/instrumentor.py b/src/monocle_apptrace/instrumentor.py
@@ -96,11 +96,14 @@ def setup_monocle_telemetry(
     traceProvider = TracerProvider(resource=resource)
     tracerProviderDefault = trace.get_tracer_provider()
     providerType = type(tracerProviderDefault).__name__
+    isProxyProvider = "Proxy" in providerType
     for processor in span_processors:
         processor.on_start = on_processor_start
-        tracerProviderDefault.add_span_processor(processor)
-        traceProvider.add_span_processor(processor)
-    if "Proxy" in providerType :
+        if not isProxyProvider:
+            tracerProviderDefault.add_span_processor(processor)
+        else :
+            traceProvider.add_span_processor(processor)
+    if isProxyProvider :
         trace.set_tracer_provider(traceProvider)
     instrumentor = MonocleInstrumentor(user_wrapper_methods=wrapper_methods)
     instrumentor.app_name = workflow_name

diff --git a/tests/http_span_exporter.py b/tests/http_span_exporter.py
@@ -0,0 +1,88 @@
+# Copyright (C) Http Inc 2023-2024. All rights reserved
+
+import json
+import logging
+import os
+from typing import Optional, Sequence
+import requests
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+
+REQUESTS_SUCCESS_STATUS_CODES = (200, 202)
+
+logger = logging.getLogger(__name__)
+
+class HttpSpanExporter(SpanExporter):
+    def __init__(
+        self,
+        endpoint: Optional[str] = None,
+        timeout: Optional[int] = None,
+        session: Optional[requests.Session] = None,
+    ):
+        """Http exporter."""
+        http_endpoint: str = os.environ["HTTP_INGESTION_ENDPOINT"]
+        self.endpoint = endpoint or http_endpoint
+        api_key: str = os.environ["HTTP_API_KEY"]
+
+        self.session = session or requests.Session()
+        self.session.headers.update(
+            {"Content-Type": "application/json", "x-api-key": api_key}
+        )
+        self._closed = False
+        self.timeout = timeout or 10
+
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        # After the call to Shutdown subsequent calls to Export are
+        # not allowed and should return a Failure result
+        if self._closed:
+            logger.warning("Exporter already shutdown, ignoring batch")
+            return SpanExportResult.FAILUREencoder
+        if len(spans) == 0:
+            return
+
+        span_list = {
+            "batch" : []
+        }
+
+        # append the batch object with all the spans object
+        for span in spans:
+            # create a object from serialized span
+            obj = json.loads(span.to_json())
+            if obj["parent_id"] is None:
+                obj["parent_id"] = "None"
+            else:
+                obj["parent_id"] = remove_0x_from_start(obj["parent_id"])
+            if obj["context"] is not None:
+                obj["context"]["trace_id"] = remove_0x_from_start(obj["context"]["trace_id"])
+                obj["context"]["span_id"] = remove_0x_from_start(obj["context"]["span_id"])
+            span_list["batch"].append(obj)
+
+        result = self.session.post(
+            url=self.endpoint,
+            data=json.dumps(span_list),
+            timeout=self.timeout,
+        )
+        if result.status_code not in REQUESTS_SUCCESS_STATUS_CODES:
+            logger.error(
+                "Traces cannot be uploaded; status code: %s, message %s",
+                result.status_code,
+                result.text,
+            )
+            return SpanExportResult.FAILURE
+        return SpanExportResult.SUCCESS
+
+    def shutdown(self) -> None:
+        if self._closed:
+            logger.warning("Exporter already shutdown, ignoring call")
+            return
+        self.session.close()
+        self._closed = True
+
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return True
+
+# only removes the first occurrence of 0x from the string
+def remove_0x_from_start(my_str: str):
+    if my_str.startswith("0x"):
+        return my_str.replace("0x", "", 1)
+    return my_str
diff --git a/tests/langchain_test.py b/tests/langchain_test.py
@@ -34,6 +34,8 @@
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanProcessor, ConsoleSpanExporter
 
+from http_span_exporter import HttpSpanExporter
+
 logger = logging.getLogger()
 logger.setLevel(logging.DEBUG)
 fileHandler = logging.FileHandler('traces.txt','w')
@@ -81,7 +83,7 @@ def __createChain(self):
 
         embeddings = HuggingFaceEmbeddings(model_id = "multi-qa-mpnet-base-dot-v1")
         my_path = os.path.abspath(os.path.dirname(__file__))
-        model_path = os.path.join(my_path, "../data/coffee_embeddings")
+        model_path = os.path.join(my_path, "./vector_data/coffee_embeddings")
         vectorstore = faiss.FAISS.load_local(model_path, embeddings, allow_dangerous_deserialization = True)
 
         retriever = vectorstore.as_retriever()
@@ -95,7 +97,8 @@ def __createChain(self):
         return rag_chain
 
     def setUp(self):
-        print("setUp")
+        os.environ["HTTP_API_KEY"] = "key1"
+        os.environ["HTTP_INGESTION_ENDPOINT"] = "https://localhost:3000/api/v1/traces"
 
     def tearDown(self) -> None:
         print("cleaning up with teardown")
@@ -114,7 +117,7 @@ def test_llm_chain(self, mock_post):
         context_key = "context_key_1"
         context_value = "context_value_1"
         set_context_properties({context_key: context_value})
-
+        
         self.chain = self.__createChain()
 
         mock_post.return_value.status_code = 201
@@ -150,14 +153,16 @@ def test_custom_methods(self):
         wrap_method = MagicMock(return_value=3)
         setup_monocle_telemetry(
             workflow_name=app_name,
-            span_processors=[],
+            span_processors=[
+                    BatchSpanProcessor(HttpSpanExporter("https://localhost:3000/api/v1/traces"))
+                ],
             wrapper_methods=[
                 WrapperMethod(
                     package="dummy_class",
                     object="DummyClass",
-                    method="invoke",
+                    method="dummy_method",
                     span_name="langchain.workflow",
-                    wrapper=wrap_method),
+                    wrapper=wrap_method()),
 
             ])
         dummy_class_1 = DummyClass()