Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solution to assignment 2.2 #70

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2e73f9a
Solution of assignment 1
Hemant-Chowdhury Aug 19, 2019
1906f2c
changes in doc
Hemant-Chowdhury Aug 19, 2019
202f027
Merge branch 'master' of https://github.com/kaustubh-karkare/project-…
Hemant-Chowdhury Aug 28, 2019
e23f933
Created seperate module named argparser
Hemant-Chowdhury Aug 28, 2019
c7c4512
adding documentation in the form Readme.md
Hemant-Chowdhury Aug 28, 2019
5c52f94
Removing unnecessary files
Hemant-Chowdhury Aug 29, 2019
5c1ebc4
Implemented changes requested and added some new features
Hemant-Chowdhury Aug 30, 2019
59b83e8
Implementing changes requested for assignment-1
Hemant-Chowdhury Sep 4, 2019
8b1d7d4
Adding solution for assignment-2
Hemant-Chowdhury Sep 4, 2019
9d3287b
Removing .mypy_cache files
Hemant-Chowdhury Sep 4, 2019
f5a3ec5
Cleaning object and executable files
Hemant-Chowdhury Sep 4, 2019
dcb3720
Minor changes in test.py
Hemant-Chowdhury Sep 4, 2019
ffd85a8
Implemented watch functionality
Hemant-Chowdhury Sep 11, 2019
b106e42
Implementing required changes
Hemant-Chowdhury Oct 9, 2019
c2b7560
Added commit,diff,status,log,reset,checkout functionality
Hemant-Chowdhury Nov 23, 2019
9e07cf9
Removing solution to assignment 2.1 for the time being
Hemant-Chowdhury Nov 23, 2019
7dc9429
minor changes in vcs.py
Hemant-Chowdhury Dec 12, 2019
e805fee
Added unit tests and code enhancement
Hemant-Chowdhury Dec 14, 2019
b16b9d4
cleaning repository
Hemant-Chowdhury Mar 21, 2020
95aeb3d
Added solution to assignment-2.2
Hemant-Chowdhury Apr 1, 2020
db835e0
Removing comments
Hemant-Chowdhury Apr 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions solutions/assignment-2.2/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class InvalidUrl(Exception):

def __init__(self, msg):
super(InvalidUrl, self).__init__(msg)
self.msg = msg
38 changes: 38 additions & 0 deletions solutions/assignment-2.2/header.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Optional
HEADER_END = "\r\n\r\n"
NEW_LINE = "\r\n"


class RequestHeaderBuilder(object):

def __init__(self):
super(RequestHeaderBuilder, self).__init__()

@staticmethod
def create(url_path='/', request_method='GET', http_version='1.0', **kwargs)-> str:
request_string_list = []
request_string_list.append(f"{request_method} {url_path} HTTP/{http_version}")
for key, val in kwargs.items():
request_string_list.append(f"{key}: {val}")
return NEW_LINE.join(request_string_list) + HEADER_END


class ResponseHeaderParser(object):

def __init__(self, raw_header: str):
super(ResponseHeaderParser, self).__init__()
raw_header = raw_header.split(HEADER_END)[0]
self.request_line, headers = raw_header.split(NEW_LINE, 1)
self.response_code = self.request_line.split(' ')[1]
self.header_dict = dict()
for header in headers.split(NEW_LINE):
key, value = header.split(": ", 1)
self.header_dict[key] = value

def get_filename_from_response(self) -> Optional[str]:
if 'Content-Disposition' not in self.header_dict:
return None
content_disposition = self.header_dict['Content-Disposition']
if "filename" not in content_disposition:
return None
return content_disposition.split('\"')[1]
25 changes: 25 additions & 0 deletions solutions/assignment-2.2/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import re
from exception import InvalidUrl
from typing import Optional

URL_PATTERN = re.compile("^(.*://)?([A-Za-z0-9\-\.]+)(:[0-9]+)?(.*)$")


class UrlParser(object):

def __init__(self, url):
super(UrlParser, self).__init__()
url_data = re.match(URL_PATTERN, url)
try:
self.scheme = url_data.groups()[0][: -3]
except TypeError:
raise InvalidUrl(f'url scheme is missing in {url}')
self.host = url_data.groups()[1]
self.port = int(url_data.groups()[2][1:]) if url_data.groups()[2] else 80 if self.scheme == 'http' else 443
self.path_with_query = url_data.groups()[3] if url_data.groups()[3] else "/"

def get_filename_from_url(self)-> Optional[str]:
path = self.path_with_query.split('?')[0]
if path == '/':
return None
return path.split('/')[-1]
80 changes: 80 additions & 0 deletions solutions/assignment-2.2/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import unittest
import hashlib
import os
import tempfile
from contextlib import contextmanager
from urlFileDownloader import UrlFileDownloader
from exception import InvalidUrl

CHUNK_SIZE = 4096


@contextmanager
def tempdir(dirname):
oldpath = os.getcwd()
os.chdir(dirname)
try:
yield
finally:
os.chdir(oldpath)


class TestUrlFileDownloader(unittest.TestCase):

def download_and_match_hash(self, urlFileDownloader: UrlFileDownloader, expected_hash_content: bytes):
hash = hashlib.sha256()
with tempfile.TemporaryDirectory() as tmpdirname:
with tempdir(tmpdirname):
urlFileDownloader.download()
with open(urlFileDownloader.download_file, 'rb') as file:
while True:
data = file.read(CHUNK_SIZE)
if not data:
break
hash.update(data)
self.assertEqual(hash.digest(), expected_hash_content)

def test_small_txt_file_download_having_https_scheme(self):
urlFileDownloader = UrlFileDownloader('https://raw.githubusercontent.com/Hemant-Chowdhury/public_files/master/Test.txt')
self.download_and_match_hash(urlFileDownloader, b"}\xd9\x1e\x07\xf04\x16F\xd5?i8'\x8aM>\x87\x96\x1f\xab\xea\x06o~o@\xb79\x8f;\x0b\x0f")

def test_large_txt_file_download(self):
urlFileDownloader = UrlFileDownloader('https://raw.githubusercontent.com/Hemant-Chowdhury/public_files/master/10MB.txt')
self.download_and_match_hash(urlFileDownloader, b'\xca\x06i\x91X)\xf7w\xb4g5\xe8\xab\xcb\x16\xe2\xc7"Q\x1c\xc8\x9aC\xd4\x02Wzqc\xab\x93l')

def test_txt_file_download_having_http_scheme(self):
urlFileDownloader = UrlFileDownloader('http://data.pr4e.org/romeo.txt')
self.download_and_match_hash(urlFileDownloader, b'\x92Va\xfbF\x8d\xa9P\x82\x8fD\xe4\xc3|\xde\x02K\xaa\xf8M\xb5\xa3\x15E^\xa8\xec\xb94\x8b\x12\xcf')

def test_pdf_download(self):
urlFileDownloader = UrlFileDownloader('http://www.africau.edu/images/default/sample.pdf')
self.download_and_match_hash(urlFileDownloader, b'\x8d\xec\xc8W\x19F\xd4\xcdp\xa0$\x94\x9e\x03:**T7\x7f\xe9\xf1\xc1\xb9D\xc2\x0f\x9e\xe1\x1a\x9eQ')

def test_image_download_having_different_filename_in_content_disposition(self):
urlFileDownloader = UrlFileDownloader('https://i.picsum.photos/id/145/200/300.jpg')
self.download_and_match_hash(urlFileDownloader, b'\xa4\x9b{\xd1J\x13\xc1HlR-\x9c\xa6\xed\x02\x9e;\x93\xa6\xcbp\xb2[\x9dV\x14\xe2\xf6|\x11\x8d\x96')

def test_video_download(self):
urlFileDownloader = UrlFileDownloader('https://file-examples.com/wp-content/uploads/2018/04/file_example_MOV_480_700kB.mov')
self.download_and_match_hash(urlFileDownloader, b'\x867\xa4G\xa9U\x13?w`\x11\xe9\x06\xf1m\xb3uc\x1c\x9b\xd6\xba;\xbc$\xc0\xcf\x03|o\xab\x07')

def test_invalid_url_having_no_file(self):
urlFileDownloader = UrlFileDownloader('https://www.google.com')
with self.assertRaises(InvalidUrl) as context:
urlFileDownloader.download()
self.assertEqual(str(context.exception), 'valid file/filename not found')

def test_invalid_url_having_no_url_scheme(self):
with self.assertRaises(InvalidUrl) as context:
UrlFileDownloader('data.pr4e.org/romeo.txt')
self.assertEqual(str(context.exception), 'url scheme is missing in data.pr4e.org/romeo.txt')

def test_invalid_url_unreachable(self):
urlFileDownloader = UrlFileDownloader('http://www.google.com/text/romeo.txt')
with self.assertRaises(InvalidUrl) as context:
urlFileDownloader.download()
self.assertEqual(str(context.exception), 'url not reachable')


if __name__ == '__main__':
unittest.main()
70 changes: 70 additions & 0 deletions solutions/assignment-2.2/urlFileDownloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from parser import UrlParser
from header import RequestHeaderBuilder
from header import ResponseHeaderParser
from header import HEADER_END
from exception import InvalidUrl
import socket
import ssl

HTTPS = "https"
CHUNK_SIZE = 1024
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)


class UrlFileDownloader(object):

def __init__(self, url):
super(UrlFileDownloader, self).__init__()
self.url = UrlParser(url)
self.download_filename = None

def download(self):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
if self.url.scheme == HTTPS:
with context.wrap_socket(sock, server_hostname=self.url.host) as secure_sock:
secure_sock.connect((self.url.host, self.url.port))
self.send_request_header(secure_sock)
self.receive_response(secure_sock)
else:
sock.connect((self.url.host, self.url.port))
self.send_request_header(sock)
self.receive_response(sock)

def send_request_header(self, sock):
request_headers = RequestHeaderBuilder.create(
method="GET",
url_path=self.url.path_with_query,
http_version="1.1",
Host=self.url.host,
Connection="close")
sock.sendall(request_headers.encode())

def receive_response(self, sock):
header_data = b''
body_data = b''
while True:
data = sock.recv(CHUNK_SIZE)
if not data:
break
if HEADER_END.encode() in data:
header_fragment, body_fragment = data.split(HEADER_END.encode(), 1)
header_data += header_fragment
body_data += body_fragment
break
header_data += data
response_header = ResponseHeaderParser(header_data.decode(errors="ignore"))

if response_header.response_code[0] != '2':
raise InvalidUrl("url not reachable")

filename = response_header.get_filename_from_response() or self.url.get_filename_from_url()
if not filename or '.' not in filename:
raise InvalidUrl("valid file/filename not found")
self.download_file = filename
with open(self.download_file, 'wb') as download_file:
download_file.write(body_data)
while True:
data = sock.recv(CHUNK_SIZE)
if not data:
break
download_file.write(data)