Skip to content

Commit

Permalink
Move hashing into command, it is only used here
Browse files Browse the repository at this point in the history
  • Loading branch information
Viktor Dick committed Feb 26, 2024
1 parent 6c7b3b1 commit cd7916c
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 39 deletions.
38 changes: 32 additions & 6 deletions perfact/zodbsync/commands/layer_hash.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python

import os
import hashlib

from ..subcommand import SubCommand
from ..helpers import hashdir


class LayerHash(SubCommand):
Expand All @@ -19,8 +19,34 @@ def add_args(parser):
)

def run(self):
path = self.args.path
with open(os.path.join(path, '.checksums'), 'w') as f:
root = os.path.join(path, '__root__')
for path, checksum in hashdir(root):
print(checksum, path, file=f)
"""
Create a sorted list of hashes for each folder below <path>/__root__.
This is used when changing the contents of a layer to recognize which
objects are to be played back.
For each folder that contains files, it creates a sha1sum over:
- The sorted list of files
- The concatenation of the file contents
The output is written to <path>/.checksums.
"""
root = os.path.join(self.args.path, '__root__')
todo = [root]
with open(os.path.join(self.args.path, '.checksums'), 'w') as fd:
while todo:
path = todo.pop()
entries = list(os.scandir(path))
todo.extend(sorted((entry.path for entry in entries
if entry.is_dir()), reverse=True))
files = sorted(entry.path for entry in entries
if entry.is_file())
if not files:
continue

h = hashlib.sha1()
for file in files:
h.update(file.encode('utf-8') + b'\n')
h.update(b'\n')
for fname in files:
with open(fname, 'rb') as f:
while data := f.read(1024*1024):
h.update(data)
print(h.hexdigest(), path[len(root):] or '/', file=fd)
33 changes: 0 additions & 33 deletions perfact/zodbsync/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import ast
import operator
import importlib
import hashlib
import os


class Namespace(object):
Expand Down Expand Up @@ -235,34 +233,3 @@ def increment_txnid(s):
arr[pos] += 1
break
return bytes(arr)


def hashdir(root):
"""
Create a sorted list of hashes for each folder below path.
This is used when changing the contents of a layer to recognize which
objects are to be played back.
For each folder that contains files, it creates a sha512sum over:
- The sorted list of files
- The concatenation of the file contents
This is a coroutine that yields tuples of relative paths and the checksum.
"""
def process(path):
entries = list(os.scandir(path))
files = sorted(entry.path for entry in entries if entry.is_file())
dirs = sorted(entry.path for entry in entries if entry.is_dir())
if files:
h = hashlib.sha1()
for file in files:
h.update(file.encode('utf-8') + b'\n')
h.update(b'\n')
for fname in files:
with open(fname, 'rb') as f:
while data := f.read(1024*1024):
h.update(data)
yield (path[len(root):], h.hexdigest())

for d in dirs:
yield from process(d)

yield from process(root)

0 comments on commit cd7916c

Please sign in to comment.