From ca2a692193307feec4277142f88fd1afd819f140 Mon Sep 17 00:00:00 2001 From: Chris Mattmann Date: Sun, 1 Jan 2023 14:49:18 -0800 Subject: [PATCH] Fix for #375 #382 Adds an option for raw response returning in parser.py --- tika/parser.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tika/parser.py b/tika/parser.py index d9f185e..229c22d 100644 --- a/tika/parser.py +++ b/tika/parser.py @@ -20,7 +20,7 @@ import os import json -def from_file(filename, serverEndpoint=ServerEndpoint, service='all', xmlContent=False, headers=None, config_path=None, requestOptions={}): +def from_file(filename, serverEndpoint=ServerEndpoint, service='all', xmlContent=False, headers=None, config_path=None, requestOptions={}, raw_response=False): ''' Parses a file for metadata and content :param filename: path to file which needs to be parsed or binary file using open(path,'rb') @@ -41,10 +41,13 @@ def from_file(filename, serverEndpoint=ServerEndpoint, service='all', xmlContent else: output = parse1(service, filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'}, headers=headers, config_path=config_path, requestOptions=requestOptions) - return _parse(output, service) + if raw_response: + return output + else: + return _parse(output, service) -def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False, headers=None, config_path=None, requestOptions={}): +def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False, headers=None, config_path=None, requestOptions={}, raw_response=False): ''' Parses the content from buffer :param string: Buffer value @@ -63,7 +66,10 @@ def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False, headers else: status, response = callServer('put', serverEndpoint, '/rmeta/xml', string, headers, False, config_path=config_path, requestOptions=requestOptions) - return _parse((status,response)) + if raw_response: + return (status, response) + else: + return _parse((status,response)) def _parse(output, service='all'): '''