-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhttp_file_server.py
497 lines (444 loc) · 19.2 KB
/
http_file_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
# http文件服务器程序, 可用于在本地创建一个网站,基于socket库
# 命令行:python http_file_server.py <端口号(可选)>
import sys, os, time, traceback, threading
import socket, mimetypes
from ast import literal_eval
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import urlparse,parse_qs, unquote
import chardet
HEAD_100 = b"HTTP/1.1 100 Continue\n"
HEAD_OK = b"HTTP/1.1 200 OK\n"
HEAD_206 = b"HTTP/1.1 206 Partial Content\n"
HEAD_404 = b"HTTP/1.1 404 Not Found\n"
HEAD_413 = b"HTTP/1.1 413 Payload Too Large\n"
RECV_LENGTH = 1 << 19 # sock.recv()一次接收内容的长度
CHUNK_SIZE = 1 << 20 # 发送内容长度(1MB)
SEND_SPEED = 10 # 大文件的发送速度限制,单位为MB/s,设为非正数则不限速
MAX_UPLOAD_SIZE = 1 << 26 # 64MB
MAX_FILE_SIZE = 1 << 25 # 32MB
MAX_WAIT_CONNECTIONS = 128
FLUSH_INTERVAL = 1 # 日志写入后1s刷新一次日志
HEADER_FLUSH_INTERVAL = 5
LOG_FILE=os.path.join(os.path.split(__file__)[0],"server.log")
LOG_FILE_ERR=os.path.join(os.path.split(__file__)[0],"server_err.log")
LOG_FILE_HEADER=os.path.join(os.path.split(__file__)[0],"request_headers.log")
UPLOAD_PATH=os.path.join(os.path.split(__file__)[0],"uploads")
cur_address=(None, None);log_file_reqheader=None
class AutoFlushWrapper: # 自动调用flush()的包装器
def __init__(self,stream,interval=0):
self._stream=stream
self._interval=interval
self._waiting_for_flush=False # 是否正在等待调用flush
self._condition=threading.Condition()
self._stopped=threading.Event()
flush_thread=threading.Thread(target=self._auto_flush_thread)
flush_thread.daemon=True
flush_thread.start()
def write(self,message):
result=self._stream.write(message)
if not self._waiting_for_flush:
with self._condition:
self._condition.notify_all()
return result
def _auto_flush_thread(self): # 线程,自动调用flush()
while True:
with self._condition:
self._condition.wait()
if self._stopped.is_set():
break
self._waiting_for_flush=True
time.sleep(self._interval)
self._stream.flush()
self._waiting_for_flush=False
def stop_auto_flush(self):
if self._stopped.is_set(): # 已经停止过
return
self._stopped.set()
with self._condition:
self._condition.notify_all()
def close(self):
self.stop_auto_flush()
self._stream.close() # close()会自动调用flush()
def __getattr__(self,attr):
try:
return super().__getattr__(self,attr)
except AttributeError:
return getattr(self._stream,attr) # 返回self._stream的属性和方法
class RedirectedOutput:
def __init__(self,*streams):
if not streams:raise ValueError("At least one stream should be provided")
self._streams=streams
def write(self,data):
written=self._streams[0].write(data)
result=written if written is not None else len(data)
for stream in self._streams[1:]:
written=stream.write(data)
result=min(result,written if written is not None else result)
return result
def flush(self):
for stream in self._streams:
stream.flush()
def stop_auto_flush(self):
for stream in self._streams:
if hasattr(stream, "stop_auto_flush"):
stream.stop_auto_flush()
def isatty(self):
return any(stream.isatty() for stream in self._streams)
def close(self):
for stream in self._streams:
stream.close()
def log_addr(*args, sep=" ", file=None, flush=False): # 带时间和IP地址、端口的日志记录
print(f"{time.asctime()} | {cur_address[0]}:{cur_address[1]}{sep}{sep.join(args)}",
file=file,flush=flush)
def _read_file_helper(head,file,chunk_size,start,end): # 分段读取文件使用的生成器
yield head
file.seek(start)
total=0
while total<end-start:
size=min(chunk_size,end-start-total)
data=file.read(size)
total+=size
yield data
file.close()
def _slice_helper(data,size):
n=len(data)
for i in range(0,n,size):
yield data[i:i+size]
def convert_size(num): # 将整数转换为数据单位
units = ["", "K", "M", "G", "T", "P", "E", "Z", "Y"]
for unit in units:
if num < 1024:
return f"{num:.2f}{unit}B"
num /= 1024
return f"{num:.2f}{units[-1]}B"
def split_formdata(data: bytes, boundary: str):
# 分割multipart/form-data数据
boundary = boundary.encode()
idx = None
wrap = b"\r\n"
slices = []
while idx is None or idx < len(data):
result = data.find(boundary, idx)
if result == -1:return
elif idx is not None:
slices.append((idx, result-(len(wrap)+2))) # boundary之前会加入b"\r\n--"
idx = data.find(wrap, result+len(boundary)) + len(wrap)
for item in slices:
yield data[item[0]:item[1]]
def parse_line(line, use_eval = False):
# 辅助函数,解析类似form-data; name="file"的数据
result = {}; type_ = None
for i,item in enumerate(line.split(";")):
item = item.strip()
lst = item.split("=",1) # 解析字符串
if len(lst) < 2:
if i == 0: type_ = item
continue
value = lst[1]
if use_eval:value = literal_eval(value)
result[lst[0]] = value
return type_, result
def get_mimetype(path):
mimetypes.types_map[".js"]="application/javascript"
mime_type=mimetypes.guess_type(path)[0]
if mime_type=="text/plain":
mimetype=mimetypes.types_map.get(os.path.splitext(path)[1],"text/plain")
return mime_type
def check_filetype(path): # 检查文件扩展名并返回content-type
mime_type=get_mimetype(path)
if mime_type is None: # 未知类型
return b"" # 不返回类型,由浏览器自行检测
if mime_type.lower().startswith("text"):
with open(path,"rb") as f:
head=f.read(512) # 读取文件头部,并检测编码
detected=chardet.detect(head)
coding=detected["encoding"]
if coding=="ascii": # 如果未检测到多字节的编码,则尝试继续检测
data=f.read(3072)
if data:
detected=chardet.detect(data)
coding=detected["encoding"]
if coding=="ascii":
coding="utf-8" # 默认使用utf-8
if coding is not None and detected["confidence"]>0.9:
mime_type+=";charset=%s"%coding
return b"Content-Type: %s\n"%mime_type.encode()
def parse_head(req_head): # 解析请求头中的路径和查询参数
url = unquote(req_head.split(' ')[1])[1:] # 获取请求的路径, 在请求数据第一行
parse_result = urlparse(url)
direc,query_str,fragment = parse_result.path,\
parse_result.query,parse_result.fragment
query = parse_qs(query_str,keep_blank_values=True)
fragment = fragment or None
if direc == "": # 路径为空,则用当前路径
direc="."
direc=direc.replace("\\","/")
if direc[-1]=="/": # 去除末尾多余的斜杠
direc=direc[:-1]
return direc,query,fragment
def get_dir_content(direc):
path = os.path.join(os.getcwd(),direc)
head = HEAD_OK
response = head + f"""
<html><head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>{direc} 的目录</title>
</head><body>
<h1>{direc} 的目录</h1>""".encode()
# 获取当前路径下的各个文件、目录名
subdirs=[] # 子目录名
subfiles=[] # 子文件名
for sub in os.listdir(path):
# os.listdir()无法直接区分目录名和文件名, 因此还需进行判断
if os.path.isfile(os.path.join(path,sub)): # 如果子项是文件
subfiles.append(sub)
else: # 子项是目录
subdirs.append(sub)
subdirs.sort(key=lambda s:s.lower()) # 升序排序
subfiles.sort(key=lambda s:s.lower())
if direc != ".":
response += f'\n<p><a href="/{direc}/..">[上级目录]</a></p>'.encode()
# 依次显示各个子文件、目录
for sub in subdirs:
response += f'\n<p><a href="/{direc}/{sub}">[{sub}]</a></p>'.encode()
for sub in subfiles:
size=convert_size(os.path.getsize(os.path.join(path,sub)))
response += f'''\n<p><a href="/{direc}/{sub}">{sub}</a>\
<span style="color: #707070;"> {size}</span></p>'''.encode()
response += b"\n</body></html>"
return response
def get_file(path,start=None,end=None): # 返回文件的数据
size = os.path.getsize(path)
if start is not None or end is not None:
start = start or 0
end = end or size
head = (HEAD_206 if start>0 else HEAD_OK) + check_filetype(path)
head += b"Content-Range: bytes %d-%d/%d\n\n" % (start,end,size)
else:
start = 0; end = size
head = HEAD_OK + check_filetype(path) # 加入content-type
# 响应头末尾以两个换行符(\n\n)结尾
head += b"Content-Length: %d\n\n" % size # 加入文件长度
return _read_file_helper(head,open(path,'rb'),CHUNK_SIZE,start,end) # 分段读取文件
def getcontent(direc,query=None,fragment=None,start=None,end=None): # 根据url的路径direc构造响应数据
if query is None:
query = {}
# 将direc转换为系统路径, 放入path
path = os.path.join(os.getcwd(),direc)
try:
if ".." in direc.split("/"): # 禁止访问上层目录
raise OSError # 引发错误, 进入except语句
if os.path.isdir(path):
# 找出路径中名为index的文件,若有则直接读取
file=None
for f in os.listdir(path):
if f.split(".")[0].lower()=="index":
file = f
if f.split(".")[-1].lower() in ("htm","html"): # 当有多个index文件时html文件优先
break
if file is not None:
path = os.path.join(path,file)
# 构造响应数据
if os.path.isfile(path): # --path是文件, 就打开文件并读取--
response = get_file(path,start,end)
elif os.path.isdir(path): # --path是路径, 就显示路径中的各个文件--
response = get_dir_content(direc)
else: # 不存在文件或目录
# 若.html的后缀名省略,自动寻找html文件
# 不过,例如要访问path,path/index.html要优先于path.html,用户可自行修改
for ext in (".htm",".html"):
file = path + ext
if os.path.isfile(file):
response = get_file(file,start,end)
break
else:
raise OSError # 当作错误处理, 进入except语句
except OSError:
# 返回404
response = HEAD_404 + f"""
<html><head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>404</title>
</head><body>
<h1>404 Not Found</h1>
<p>页面 /{direc} 未找到</p>
<a href="/{direc}/..">返回上一级</a>
<a href="/">返回首页</a>
</body></html>
""".encode()
return response
def send_response(sock,response,address):
# 分段发送响应
if isinstance(response,bytes):
response = _slice_helper(response,CHUNK_SIZE)
total=0
chunk=next(response)
sock.send(chunk)
begin=time.perf_counter()
while True:
size=len(chunk)
total+=size
try:
chunk=next(response)
except StopIteration:
break
else:
if SEND_SPEED > 0:
seconds = (total/(1<<20))/SEND_SPEED - \
(time.perf_counter() - begin) # 预计时间 - 实际时间
if seconds > 0:
time.sleep(seconds) # 延迟发送,限制速度
sock.send(chunk)
if SEND_SPEED > 0 and total >= SEND_SPEED*(1<<20) \
or SEND_SPEED <= 0 and total >= 1<<27: # 如果预计发送时间超过1秒,或不限速时大于128MB
log_addr("较大响应 (%s) 发送完毕" % convert_size(total))
def handle_post(sock,req_head,req_info,content):
template = """
<html><head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>{title}</title>
</head><body>
<h1>{msg}</h1>
<a href="javascript:void(0);"
onclick="window.history.back();">返回</a>
</body></html>
""" # 提交完成的页面模板
length = int(req_info.get('Content-Length',-1))
if length > MAX_UPLOAD_SIZE:
log_addr("尝试提交过大表单:",convert_size(MAX_UPLOAD_SIZE))
msg = f"提交失败,数据量大于 {convert_size(MAX_UPLOAD_SIZE)} "
# TODO: 会导致客户端浏览器显示“已重置连接”
return HEAD_413 + template.format(title="提交失败",msg=msg).encode()
content_type, formdata_info = parse_line(req_info["Content-Type"])
is_multipart_form = content_type == "multipart/form-data"
if len(content) < length: # 内容不完整,尝试继续接收数据
chunks = []
received_len = len(content)
while True:
new_data = sock.recv(RECV_LENGTH)
chunks.append(new_data)
received_len += len(new_data)
if not new_data or received_len >= length:break
if received_len > MAX_UPLOAD_SIZE:return HEAD_413 + b"\n"
content += b"".join(chunks)
if length != -1:content = content[:length] # 截断过长的数据
if is_multipart_form: # 处理多部分表单,如上传文件等请求
form = {}
for data in split_formdata(content, formdata_info["boundary"]):
_, info = get_request_info(data, has_head = False)
# Content-Disposition类似于: form-data; name="file"; filename="\xe5\x9b\xbe.jpg"
content_type, disposition = parse_line(info["Content-Disposition"], use_eval=True)
idx = data.find(b"\r\n\r\n")
if idx == -1:data=b""
data = data[idx + 4:] # 内容数据
if "filename" in disposition:
os.makedirs(UPLOAD_PATH,exist_ok=True)
if len(data) > MAX_FILE_SIZE:
log_addr("尝试提交过大的文件:",disposition["filename"],
convert_size(len(data)))
title = "提交失败"
msg = f"提交失败,最大只能上传 {convert_size(MAX_FILE_SIZE)} 的文件"
return HEAD_413 + template.format(title=title,msg=msg).encode()
filename = os.path.join(UPLOAD_PATH,disposition["filename"])
with open(filename,"wb") as f:
f.write(data) # 保存上传的文件
log_addr("上传文件:",disposition["filename"])
form[disposition["name"]] = filename
else:
try: data = data.decode()
except UnicodeDecodeError: pass
form[disposition["name"]] = data
else:
if len(content)<length: # post含有多个tcp数据包时
return HEAD_100 # 让客户端继续发送数据
else:
form=parse_qs(content.decode("utf-8"),
keep_blank_values=True,encoding="utf-8")
log_addr("提交数据:",form)
title = msg = "提交成功"
return HEAD_OK + template.format(title=title,msg=msg).encode()
def get_request_info(data: bytes, has_head = True):
# 获取请求头部信息,首行存入req_head字符串,其他信息存入字典req_info
lines = data.splitlines()
if has_head:
req_head = lines.pop(0).decode("utf-8")
else:
req_head = None
req_info = {}
for line in lines:
if not line:break # 两个空行表示开头的结束
line = line.decode("utf-8")
lst = line.split(':', 1)
try:
key, value = lst[0].strip(), lst[1].strip()
req_info[key] = value
except (ValueError, IndexError): # 不是请求头信息时
pass
return req_head,req_info
def handle_get(req_head,req_info):
url=unquote(req_head.split(' ')[1])
direc,query,fragment=parse_head(req_head)
if "Range" in req_info: # 断点续传
range_=req_info["Range"].split("=",1)[1]
start,end=range_.split("-")
start = int(start) if start else None
end = int(end) if end else None
log_addr("访问URL: %s (从 %s 到 %s 断点续传)" % (url,
convert_size(start) if start is not None else None,
convert_size(end) if end is not None else "末尾"))
return getcontent(direc,query,fragment,start,end)
else:
log_addr("访问URL:",url)
return getcontent(direc,query,fragment) # 获取目录的数据
def handle_client(sock, address):# 处理客户端请求
raw = sock.recv(RECV_LENGTH)
if not raw:return # 忽略空数据
req_head,req_info = get_request_info(raw)
log_addr(f"{req_head!r} {req_info}", file=log_file_reqheader) # 记录请求头
# 获取响应数据,response可以为bytes类型,或一个生成器
if req_head.startswith("POST"): # POST请求
response=handle_post(sock,req_head,req_info,raw.splitlines()[-1])
else: # GET请求
response=handle_get(req_head,req_info)
try:send_response(sock,response,address) # 向客户端分段发送响应数据
except ConnectionError as err:
log_addr("连接异常 (%s): %s" % (type(err).__name__,str(err)))
sock.close() # 关闭客户端连接
def handle_client_thread(*args,**kw): # 仅用于多线程中产生异常时输出错误信息
try:handle_client(*args,**kw)
except Exception:
traceback.print_exc()
def main():
global cur_address, log_file_reqheader
log_file=AutoFlushWrapper(open(LOG_FILE,"a",encoding="utf-8"),FLUSH_INTERVAL)
log_file.write("\n") # 插入空行,分割上次的日志
sys.stdout=RedirectedOutput(log_file,sys.stdout) # 重定向输出
log_file_err=AutoFlushWrapper(open(LOG_FILE_ERR,"a",encoding="utf-8"),
FLUSH_INTERVAL)
log_file_err.write(f"\n{time.asctime()}:\n")
sys.stderr=RedirectedOutput(log_file_err,sys.stderr)
log_file_reqheader=AutoFlushWrapper(open(LOG_FILE_HEADER,"a",encoding="utf-8"),
HEADER_FLUSH_INTERVAL) # 记录请求头的日志
host = socket.gethostname()
port=int(sys.argv[1]) if len(sys.argv)==2 else 80 # 80为HTTP的默认端口
ips = socket.gethostbyname_ex(host)[2] # 或者socket.gethostbyname(host)
print(f"已在 {time.asctime()} 启动服务器")
print("服务器的IP:",ips)
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(("", port))
sock.listen(MAX_WAIT_CONNECTIONS) # 监听
# 单线程模式,一次处理一个客户端
#while True:
# client_sock, cur_address = sock.accept()
# handle_client(client_sock, cur_address)
# 多线程
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
try:
while True:
client_sock, cur_address = sock.accept()
executor.submit(handle_client_thread, client_sock, cur_address)
finally:
sock.close()
sys.stdout.flush();sys.stderr.flush()
log_file_reqheader.flush()
if __name__ == "__main__":main()