Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python3 support and improved tests #95

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
*~
TEST_py*
fat-test*/
101 changes: 67 additions & 34 deletions git-fat
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python
# -*- mode:python -*-

from __future__ import print_function, with_statement
from __future__ import print_function, with_statement,unicode_literals

import sys
import hashlib
Expand All @@ -15,10 +14,27 @@ import threading
import time
import collections

if not type(sys.version_info) is tuple and sys.version_info.major > 2:
sys.stderr.write('git-fat does not support Python-3 yet. Please use python2.\n')
sys.exit(1)

if sys.version_info[0] > 2:
unicode = str
else:
from io import open

def touni(s,encoding='utf8'):
"""Automate unicode conversion"""
if isinstance(s,(str,unicode)):
return s
if hasattr(s,'decode'):
return s.decode(encoding)
raise ValueError('Cound not decode')

def tobytes(s,encoding='utf8'):
"""Automatic byte conversion"""
if isinstance(s,bytes):
return s
if hasattr(s,'encode'):
return s.encode(encoding)
raise ValueError('Could not encode')

try:
from subprocess import check_output
del check_output
Expand Down Expand Up @@ -90,6 +106,7 @@ def difftreez_reader(input):
newread = input.read(BLOCK_SIZE)
if not newread:
break
newread = touni(newread)
partial += newread
while True:
head, sep, partial = partial.partition('\0')
Expand All @@ -114,7 +131,7 @@ def gitconfig_get(name, file=None):
elif p.returncode:
return gitconfig_get(name)
else:
return output
return touni(output)
def gitconfig_set(name, value, file=None):
args = ['git', 'config']
if file is not None:
Expand All @@ -128,16 +145,18 @@ class GitFat(object):
self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore
try:
self.gitroot = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()
self.gitroot = touni(self.gitroot)
except subprocess.CalledProcessError:
sys.exit(1)
self.gitdir = subprocess.check_output('git rev-parse --git-dir'.split()).strip()
self.gitdir = touni(self.gitdir)
self.objdir = os.path.join(self.gitdir, 'fat', 'objects')
if os.environ.get('GIT_FAT_VERSION') == '1':
self.encode = self.encode_v1
else:
self.encode = self.encode_v2
def magiclen(enc):
return len(enc(hashlib.sha1('dummy').hexdigest(), 5))
return len(enc(hashlib.sha1(b'dummy').hexdigest(), 5))
self.magiclen = magiclen(self.encode) # Current version
self.magiclens = [magiclen(enc) for enc in [self.encode_v1, self.encode_v2]] # All prior versions
def setup(self):
Expand All @@ -164,7 +183,6 @@ class GitFat(object):
self.verbose('Pushing to %s' % (remote))
else:
self.verbose('Pulling from %s' % (remote))

cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-']
rshopts = ''
if ssh_user:
Expand All @@ -181,7 +199,7 @@ class GitFat(object):
cmd += [remote + '/', self.objdir + '/']
return cmd
def revparse(self, revname):
return subprocess.check_output(['git', 'rev-parse', revname]).strip()
return touni(subprocess.check_output(['git', 'rev-parse', revname]).strip())
def encode_v1(self, digest, bytes):
'Produce legacy representation of file to be stored in repository.'
return '#$# git-fat %s\n' % (digest,)
Expand All @@ -190,6 +208,7 @@ class GitFat(object):
return '#$# git-fat %s %20d\n' % (digest, bytes)
def decode(self, string, noraise=False):
cookie = '#$# git-fat '
string = touni(string)
if string.startswith(cookie):
parts = string[len(cookie):].split()
digest = parts[0]
Expand Down Expand Up @@ -217,7 +236,7 @@ class GitFat(object):
return False, None
# read file
try:
digest, bytes = self.decode_stream(open(fname))
digest, bytes = self.decode_stream(open(fname,'rb'))
except IOError:
return False, None
if isinstance(digest, str):
Expand All @@ -240,7 +259,7 @@ class GitFat(object):
try:
ishanging = False
cached = False # changes to True when file is cached
with os.fdopen(fd, 'w') as cache:
with os.fdopen(fd, 'wb') as cache:
outstream = cache
firstblock = True
for block in readblocks(instream):
Expand All @@ -265,7 +284,7 @@ class GitFat(object):
os.rename(tmpname, objfile)
self.verbose('git-fat filter-clean: caching to %s' % objfile)
cached = True
outstreamclean.write(self.encode(digest, bytes))
outstreamclean.write(tobytes(self.encode(digest, bytes)))
finally:
if not cached:
os.remove(tmpname)
Expand All @@ -276,22 +295,30 @@ class GitFat(object):
version of the file on stdin and produces the "clean" (repository) version on stdout.
'''
self.setup()
self.filter_clean(sys.stdin, sys.stdout)
if hasattr(sys.stdin,'buffer'):
stdin,stdout = sys.stdin.buffer,sys.stdout.buffer
else:
stdin,stdout = sys.stdin,sys.stdout
self.filter_clean(stdin, stdout)

def cmd_filter_smudge(self):
self.setup()
result, bytes = self.decode_stream(sys.stdin)
if hasattr(sys.stdin,'buffer'):
stdin,stdout = sys.stdin.buffer,sys.stdout.buffer
else:
stdin,stdout = sys.stdin,sys.stdout
result, bytes = self.decode_stream(stdin)
if isinstance(result, str): # We got a digest
objfile = os.path.join(self.objdir, result)
try:
cat(open(objfile), sys.stdout)
cat(open(objfile,'rb'), stdout)
self.verbose('git-fat filter-smudge: restoring from %s' % objfile)
except IOError: # file not found
self.verbose('git-fat filter-smudge: fat object missing %s' % objfile)
sys.stdout.write(self.encode(result, bytes)) # could leave a better notice about how to recover this file
stdout.write(tobytes(self.encode(result, bytes))) # could leave a better notice about how to recover this file
else: # We have an iterable over the original input.
self.verbose('git-fat filter-smudge: not a managed file')
cat_iter(result, sys.stdout)
cat_iter(result, stdout)
def catalog_objects(self):
return set(os.listdir(self.objdir))
def referenced_objects(self, rev=None, all=False):
Expand All @@ -304,15 +331,18 @@ class GitFat(object):
p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE)
def cut_sha1hash(input, output):
for line in input:
output.write(line.split()[0] + '\n')
line = touni(line)
line = line.split()[0] + '\n'
output.write(tobytes(line))
output.close()
# ...`cat-file --batch-check` filters for git-fat object candidates in bulk...
p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
def filter_gitfat_candidates(input, output):
for line in input:
line = touni(line)
objhash, objtype, size = line.split()
if objtype == 'blob' and int(size) in self.magiclens:
output.write(objhash + '\n')
output.write(tobytes(objhash + '\n'))
output.close()
# ...`cat-file --batch` provides full contents of git-fat candidates in bulk
p3 = subprocess.Popen(['git','cat-file','--batch'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
Expand All @@ -326,19 +356,19 @@ class GitFat(object):
metadata_line = p3.stdout.readline()
if not metadata_line:
break # EOF
objhash, objtype, size_str = metadata_line.split()
objhash, objtype, size_str = touni(metadata_line).split()
size, bytes_read = int(size_str), 0
# We know from filter that item is a candidate git-fat object and
# is small enough to read into memory and process
content = ''
content = b''
while bytes_read < size:
data = p3.stdout.read(size - bytes_read)
if not data:
break # EOF
content += data
bytes_read += len(data)
try:
fathash = self.decode(content)[0]
fathash = touni(self.decode(content)[0])
referenced.add(fathash)
except GitFat.DecodeError:
pass
Expand All @@ -361,7 +391,8 @@ class GitFat(object):
'generator for all orphan placeholders in the working tree'
if not patterns or patterns == ['']:
patterns = ['.']
for fname in subprocess.check_output(['git', 'ls-files', '-z'] + patterns).split('\x00')[:-1]:
for fname in subprocess.check_output(['git', 'ls-files', '-z'] + patterns).split(b'\x00')[:-1]:
fname = touni(fname)
digest = self.decode_file(fname)[0]
if digest:
yield (digest, fname)
Expand Down Expand Up @@ -398,7 +429,7 @@ class GitFat(object):
cmd = self.get_rsync_command(push=True)
self.verbose('Executing: %s' % ' '.join(cmd))
p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
p.communicate(input='\x00'.join(files))
p.communicate(input=b'\x00'.join(tobytes(file) for file in files))
if p.returncode:
sys.exit(p.returncode)
def checkout(self, show_orphans=False):
Expand Down Expand Up @@ -442,7 +473,7 @@ class GitFat(object):
cmd = self.get_rsync_command(push=False)
self.verbose('Executing: %s' % ' '.join(cmd))
p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
p.communicate(input='\x00'.join(files))
p.communicate(input=b'\x00'.join(tobytes(file) for file in files))
if p.returncode:
sys.exit(p.returncode)
self.checkout()
Expand Down Expand Up @@ -480,7 +511,7 @@ class GitFat(object):
for obj in self.catalog_objects():
fname = os.path.join(self.objdir, obj)
h = hashlib.new('sha1')
for block in readblocks(open(fname)):
for block in readblocks(open(fname,'rb')):
h.update(block)
data_hash = h.hexdigest()
if obj != data_hash:
Expand All @@ -507,7 +538,7 @@ class GitFat(object):
This truncates to one hash per line.
"""
for line in input:
output.write(line[:40] + '\n')
output.write(line[:40] + b'\n')
output.close()
revlist = subprocess.Popen(['git', 'rev-list', '--all', '--objects'], stdout=subprocess.PIPE, bufsize=-1)
objcheck = subprocess.Popen(['git', 'cat-file', '--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=-1)
Expand All @@ -516,6 +547,7 @@ class GitFat(object):
numblobs = 0; numlarge = 1
# Build dict with the sizes of all large blobs
for line in objcheck.stdout:
line = touni(line)
objhash, blob, size = line.split()
if blob != 'blob':
continue
Expand Down Expand Up @@ -555,6 +587,7 @@ class GitFat(object):
lsfiles = subprocess.Popen(['git', 'ls-files', '-s'], stdout=subprocess.PIPE)
updateindex = subprocess.Popen(['git', 'update-index', '--index-info'], stdin=subprocess.PIPE)
for line in lsfiles.stdout:
line = touni(line)
mode, sep, tail = line.partition(' ')
blobhash, sep, tail = tail.partition(' ')
stageno, sep, tail = tail.partition('\t')
Expand All @@ -576,24 +609,24 @@ class GitFat(object):
hashobject.stdin.close()
filterclean = threading.Thread(target=dofilter)
filterclean.start()
cleanedobj = hashobject.stdout.read().rstrip()
cleanedobj = touni(hashobject.stdout.read()).rstrip()
catfile.wait()
hashobject.wait()
filterclean.join()
mkdir_p(os.path.dirname(hashfile))
open(hashfile, 'w').write(cleanedobj + '\n')
updateindex.stdin.write('%s %s %s\t%s\n' % (mode, cleanedobj, stageno, filename))
updateindex.stdin.write(tobytes('%s %s %s\t%s\n' % (mode, cleanedobj, stageno, filename)))
if manage_gitattributes:
try:
mode, blobsha1, stageno, filename = subprocess.check_output(['git', 'ls-files', '-s', '.gitattributes']).split()
gitattributes_lines = subprocess.check_output(['git', 'cat-file', 'blob', blobsha1]).splitlines()
mode, blobsha1, stageno, filename = touni(subprocess.check_output(['git', 'ls-files', '-s', '.gitattributes'])).split()
gitattributes_lines = touni(subprocess.check_output(['git', 'cat-file', 'blob', blobsha1])).splitlines()
except ValueError: # Nothing to unpack, thus no file
mode, stageno = '100644', '0'
gitattributes_lines = []
gitattributes_extra = ['%s filter=fat -text' % line.split()[0] for line in filelist]
hashobject = subprocess.Popen(['git', 'hash-object', '-w', '--stdin'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdout, stderr = hashobject.communicate('\n'.join(gitattributes_lines + gitattributes_extra) + '\n')
updateindex.stdin.write('%s %s %s\t%s\n' % (mode, stdout.strip(), stageno, '.gitattributes'))
stdout, stderr = hashobject.communicate(b'\n'.join(tobytes(l) for l in gitattributes_lines + gitattributes_extra) + b'\n')
updateindex.stdin.write(tobytes('%s %s %s\t%s\n' % (mode, stdout.strip(), stageno, '.gitattributes')))
updateindex.stdin.close()
lsfiles.wait()
updateindex.wait()
Expand Down
Loading