-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr_r2sl.py
204 lines (165 loc) · 6.84 KB
/
ocr_r2sl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# -*- coding: UTF-8 -*-
import hashlib
import hmac
import string
import datetime
import time
import httplib
import json
AUTHORIZATION = "authorization"
BCE_PREFIX = "x-bce-"
DEFAULT_ENCODING = 'UTF-8'
# 保存AK/SK的类
class BceCredentials(object):
def __init__(self, access_key_id, secret_access_key):
self.access_key_id = access_key_id
self.secret_access_key = secret_access_key
# 根据RFC 3986,除了:
# 1.大小写英文字符
# 2.阿拉伯数字
# 3.点'.'、波浪线'~'、减号'-'以及下划线'_'
# 以外都要编码
RESERVED_CHAR_SET = set(string.ascii_letters + string.digits + '.~-_')
def get_normalized_char(i):
char = chr(i)
if char in RESERVED_CHAR_SET:
return char
else:
return '%%%02X' % i
NORMALIZED_CHAR_LIST = [get_normalized_char(i) for i in range(256)]
# 正规化字符串
def normalize_string(in_str, encoding_slash=True):
if in_str is None:
return ''
# 如果输入是unicode,则先使用UTF8编码之后再编码
in_str = in_str.encode(DEFAULT_ENCODING) if isinstance(in_str, unicode) else str(in_str)
# 在生成规范URI时。不需要对斜杠'/'进行编码,其他情况下都需要
if encoding_slash:
encode_f = lambda c: NORMALIZED_CHAR_LIST[ord(c)]
else:
# 仅仅在生成规范URI时。不需要对斜杠'/'进行编码
encode_f = lambda c: NORMALIZED_CHAR_LIST[ord(c)] if c != '/' else c
# 按照RFC 3986进行编码
return ''.join([encode_f(ch) for ch in in_str])
# 生成规范时间戳
def get_canonical_time(timestamp=0):
# 不使用任何参数调用的时候返回当前时间
if timestamp == 0:
utctime = datetime.datetime.utcnow()
else:
utctime = datetime.datetime.utcfromtimestamp(timestamp)
# 时间戳格式:[year]-[month]-[day]T[hour]:[minute]:[second]Z
return "%04d-%02d-%02dT%02d:%02d:%02dZ" % (
utctime.year, utctime.month, utctime.day,
utctime.hour, utctime.minute, utctime.second)
# 生成规范URI
def get_canonical_uri(path):
# 规范化URI的格式为:/{bucket}/{object},并且要对除了斜杠"/"之外的所有字符编码
return normalize_string(path, False)
# 生成规范query string
def get_canonical_querystring(params):
if params is None:
return ''
# 除了authorization之外,所有的query string全部加入编码
result = ['%s=%s' % (k, normalize_string(v)) for k, v in params.items() if k.lower != AUTHORIZATION]
# 按字典序排序
result.sort()
# 使用&符号连接所有字符串并返回
return '&'.join(result)
# 生成规范header
def get_canonical_headers(headers, headers_to_sign=None):
headers = headers or {}
# 没有指定header_to_sign的情况下,默认使用:
# 1.host
# 2.content-md5
# 3.content-length
# 4.content-type
# 5.所有以x-bce-开头的header项
# 生成规范header
if headers_to_sign is None or len(headers_to_sign) == 0:
headers_to_sign = {"host", "content-md5", "content-length", "content-type"}
# 对于header中的key,去掉前后的空白之后需要转化为小写
# 对于header中的value,转化为str之后去掉前后的空白
f = lambda (key, value): (key.strip().lower(), str(value).strip())
result = []
for k, v in map(f, headers.iteritems()):
# 无论何种情况,以x-bce-开头的header项都需要被添加到规范header中
if k.startswith(BCE_PREFIX) or k in headers_to_sign:
result.append("%s:%s" % (normalize_string(k), normalize_string(v)))
# 按照字典序排序
result.sort()
# 使用\n符号连接所有字符串并返回
return '\n'.join(result)
# 签名主算法
def sign(credentials, http_method, path, headers, params,
timestamp=0, expiration_in_seconds=1800, headers_to_sign=None):
headers = headers or {}
params = params or {}
# 1.生成sign key
# 1.1.生成auth-string,格式为:bce-auth-v1/{accessKeyId}/{timestamp}/{expirationPeriodInSeconds}
sign_key_info = 'bce-auth-v1/%s/%s/%d' % (
credentials.access_key_id,
get_canonical_time(timestamp),
expiration_in_seconds)
# 1.2.使用auth-string加上SK,用SHA-256生成sign key
sign_key = hmac.new(
credentials.secret_access_key,
sign_key_info,
hashlib.sha256).hexdigest()
# 2.生成规范化uri
canonical_uri = get_canonical_uri(path)
# 3.生成规范化query string
canonical_querystring = get_canonical_querystring(params)
# 4.生成规范化header
canonical_headers = get_canonical_headers(headers, headers_to_sign)
# 5.使用'\n'将HTTP METHOD和2、3、4中的结果连接起来,成为一个大字符串
string_to_sign = '\n'.join(
[http_method, canonical_uri, canonical_querystring, canonical_headers])
# 6.使用5中生成的签名串和1中生成的sign key,用SHA-256算法生成签名结果
sign_result = hmac.new(sign_key, string_to_sign, hashlib.sha256).hexdigest()
# 7.拼接最终签名结果串
if headers_to_sign:
# 指定header to sign
result = '%s/%s/%s' % (sign_key_info, ';'.join(headers_to_sign), sign_result)
else:
# 不指定header to sign情况下的默认签名结果串
result = '%s//%s' % (sign_key_info, sign_result)
return result
if __name__ == "__main__":
credentials = BceCredentials("ak","sk") #替换成自己的AK SK
http_method = "POST"
host = 'ocr.bj.baidubce.com'
path = '/v1/recognize/line'
timestamp = int(time.time())
#body = '{"bosPath":"/v1/app/40039/person","language":"CHN_ENG"}' #替换成自己的BOS BucketName和Objectkey
jbody = json.dumps({'bosPath':'your bos path'},separators=(',',':'))
print '\n'
print jbody
print '\n'
headers = {"host": "ocr.bj.baidubce.com",
"accept-encoding":"gzip, deflate",
"connection": "keep-alive",
"accept":" */*",
"content-length": len(jbody),
"content-type":"application/json",
"x-bce-date": get_canonical_time(timestamp)
}
params = {}
result = sign(credentials, http_method, path, headers, params, timestamp)
print result
headers['Authorization'] = result
print headers
port = 80
httpClient = None
try:
httpClient = httplib.HTTPConnection(host,port)
httpClient.request(method=http_method,url=path,body = jbody,headers=headers)
response = httpClient.getresponse()
print response.status
print response.reason
print response.read()
except Exception,e:
print e
finally:
if httpClient:
httpClient.close()