Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

There is a validation error #32

Closed
chunyuma opened this issue Aug 22, 2023 · 4 comments
Closed

There is a validation error #32

chunyuma opened this issue Aug 22, 2023 · 4 comments

Comments

@chunyuma
Copy link

Hello,

While I ran the following code:

import sketch
import pandas as pd
sales_data = pd.read_csv("https://gist.githubusercontent.com/bluecoconut/9ce2135aafb5c6ab2dc1d60ac595646e/raw/c93c3500a1f7fae469cba716f09358cfddea6343/sales_demo_with_pii_and_all_states.csv")
sales_data.sketch.ask("What columns might have PII information in them?")

I got the the error below:

File /usr/local/anaconda3/envs/mypersonal_env/lib/python3.10/site-packages/pydantic/main.py:159, in BaseModel.__init__(__pydantic_self__, **data)
    157 # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    158 __tracebackhide__ = True
--> 159 __pydantic_self__.__pydantic_validator__.validate_python(data, self_instance=__pydantic_self__)

ValidationError: 1 validation error for Parameters
stop
  Field required [type=missing, input_value={}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.2/v/missing

Could you please help me figure out the reason? Thanks!

@bluecoconut
Copy link
Contributor

Yup! this is now fixed. Was caused by pydantic update having stricter rules. This is now fixed! Sorry about this.

@priyamshah112
Copy link


ValidationError Traceback (most recent call last)
Cell In[5], line 1
----> 1 df.sketch.ask("Is there any PII information in this dataset?")

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/sketch/pandas_extension.py:330, in SketchHelper.ask(self, question, call_display)
329 def ask(self, question, call_display=True):
--> 330 result = call_prompt_on_dataframe(self._obj, ask_from_parts, question=question)
331 if not call_display:
332 return result

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/sketch/pandas_extension.py:180, in call_prompt_on_dataframe(df, prompt, **kwargs)
177 text_to_copy = f"SKETCH ERROR - see print logs for full error"
178 else:
179 # using local version
--> 180 text_to_copy = prompt(**prompt_kwargs)
181 return text_to_copy

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/prompt.py:109, in Prompt.call(self, *args, **kwargs)
107 response = None
108 try:
--> 109 response = resolve(self.execute(*args, **kwargs))
110 except Exception:
111 response = f"{traceback.format_exc()}"

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/prompt.py:99, in Prompt.execute(self, *args, **kwargs)
95 if not isinstance(self, AsyncPrompt) and inspect.iscoroutinefunction(
96 self.function
97 ):
98 return resolve(self.function(*args, **kwargs))
---> 99 return self.function(*args, **kwargs)

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/sketch/pandas_extension.py:264, in ask_from_parts(dfname, column_names, data_types, extras, question, index_col_name)
260 description = get_description_from_parts(
261 column_names, data_types, extras, index_col_name
262 )
263 description = pd.json_normalize(description).to_csv(index=False)
--> 264 return ask_prompt(dfname=dfname, data_description=description, question=question)

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/prompt.py:109, in Prompt.call(self, *args, **kwargs)
107 response = None
108 try:
--> 109 response = resolve(self.execute(*args, **kwargs))
110 except Exception:
111 response = f"{traceback.format_exc()}"

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/prompt.py:98, in Prompt.execute(self, *args, **kwargs)
94 def execute(self, *args, **kwargs):
95 if not isinstance(self, AsyncPrompt) and inspect.iscoroutinefunction(
96 self.function
97 ):
---> 98 return resolve(self.function(*args, **kwargs))
99 return self.function(*args, **kwargs)

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/prompt.py:21, in resolve(obj)
19 def resolve(obj):
20 if inspect.isawaitable(obj):
---> 21 obj = asyncio.run(obj)
22 return obj

File ~/.local/lib/python3.10/site-packages/nest_asyncio.py:31, in _patch_asyncio..run(main, debug)
29 task = asyncio.ensure_future(main)
30 try:
---> 31 return loop.run_until_complete(task)
32 finally:
33 if not task.done():

File ~/.local/lib/python3.10/site-packages/nest_asyncio.py:99, in _patch_loop..run_until_complete(self, future)
96 if not f.done():
97 raise RuntimeError(
98 'Event loop stopped before Future completed.')
---> 99 return f.result()

File ~/anaconda3/envs/ttd/lib/python3.10/asyncio/futures.py:201, in Future.result(self)
199 self.__log_traceback = False
200 if self._exception is not None:
--> 201 raise self._exception.with_traceback(self._exception_tb)
202 return self._result

File ~/anaconda3/envs/ttd/lib/python3.10/asyncio/tasks.py:232, in Task.__step(failed resolving arguments)
228 try:
229 if exc is None:
230 # We use the send method directly, because coroutines
231 # don't have __iter__ and __next__ methods.
--> 232 result = coro.send(None)
233 else:
234 result = coro.throw(exc)

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/prompt.py:176, in Completion.init..function(*prompt_args, **prompt_kwargs)
174 async def function(*prompt_args, **prompt_kwargs):
175 prompt = self.get_prompt(*prompt_args, **prompt_kwargs)
--> 176 backend = self.backend or get_backend('completion')
177 return await backend(prompt, **self.kwargs)

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/backends.py:32, in get_backend(method)
30 backend_env = os.environ.get("LAMBDAPROMPT_BACKEND", None)
31 if backend_env:
---> 32 set_backend(backend_env)
33 if method in backends:
34 return backends[method]

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/backends.py:15, in set_backend(backend_name)
13 backends['completion'] = MPT7BInstructCompletion()
14 elif backend_name == 'StarCoder':
---> 15 backends['completion'] = StarCoderCompletion()
16 elif backend_name == 'StarCoderGGML':
17 backends['completion'] = StarCoderGGMLQuantizedCompletion()

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/backends.py:305, in StarCoderCompletion.init(self, hf_access_token, **kwargs)
303 if not hf_access_token:
304 raise Exception("No HuggingFace access token found (envvar HF_ACCESS_TOKEN))")
--> 305 super().init("bigcode/starcoder", use_auth_token=hf_access_token, load_config=False, **kwargs)

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/backends.py:226, in HuggingFaceBackend.init(self, model_name, torch_dtype, trust_remote_code, use_auth_token, use_device_map, load_config, **param_override)
224 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
225 torch_dtype = torch_dtype or torch.bfloat16
--> 226 super().init(**param_override)
227 init_kwargs = {
228 "torch_dtype": torch_dtype,
229 "trust_remote_code": trust_remote_code,
230 "use_auth_token": use_auth_token,
231 }
232 if load_config:

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/lambdaprompt/backends.py:46, in Backend.init(self, **param_override)
45 def init(self, **param_override):
---> 46 self.param_override = self.Parameters(**param_override)

File ~/anaconda3/envs/ttd/lib/python3.10/site-packages/pydantic/main.py:171, in BaseModel.init(self, **data)
169 # __tracebackhide__ tells pytest and some other tools to omit this function from tracebacks
170 tracebackhide = True
--> 171 self.pydantic_validator.validate_python(data, self_instance=self)

ValidationError: 1 validation error for Parameters
stop
Field required [type=missing, input_value={}, input_type=dict]
For further information visit https://errors.pydantic.dev/2.6/v/missing

I am still facing the issue with StarCoder. Can you tell me if the fix is specific to some library version or code line?

@priyamshah112
Copy link

For temporary fix, I have added line 220 of backends.py
class HuggingFaceBackend(Backend):
class Parameters(Backend.Parameters):
temperature: float = 0.01
max_new_tokens: int = 500
use_cache: bool = True
do_sample: bool = True
top_p: float = 0.92
top_k: int = 0
repetition_penalty: float = 1.1
stop: Optional[Union[str, List[str]]] = None #code_change: Assigned None

@gilgamesh7
Copy link

Hi team, thank you for the great work !
Unfortunately, this is still happening , are you able to help ?
I have put in more details in #36

Thank you team !

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants