forked from kernc/myba
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyba.sh
executable file
·536 lines (478 loc) · 17.5 KB
/
myba.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
#!/bin/sh
# myba - Secure, distributed, encrypted backups based on `sh` shell and `git` (and `openssl enc` or `gpg`)
# FIXME review
#
# Basically your beloved git, but with underlying two repos:
# - bare, local-only _plain repo_ to track changes upon local, plaintext (and binary) files, set e.g. to your $HOME,
# - _encrypted repo_ that holds the encrypted blobs.
# Only the encrypted repo is ever synced with configured remotes.
# Every commit into the plain repo creates a commit in the encrypted repo.
# Commits in the encrypted repo carry base64-encoded encrypted commit metadata of the plain repo.
# In the encrypted repo, there is a dir "manifest" with filename "{plain_repo_commit_hash}" and
# line format: `<enc_path>\t<plain_path>`.
# Encrypted paths are like "abc/def//rest-of-hash" and are _deterministic_,
# dependent upon the plain pathname and chosen password! The multi-level fs hierarchy is for near maximum efficiency of `git sparse-checkout`.
# Encrypted blobs are also encrypted deterministically, based on hash of the plain content and chosen password.
#
# This is an expected shell workflow:
#
# $ export WORK_TREE= # Defaults to $HOME
# $ myba init
# $ myba add .config/git/config .vimrc .ssh/config
# $ PASSWORD=secret myba commit -m 'my config files' # Reads pw from stdin if unset
# $ myba rm .vimrc
# $ myba commit -m 'no longer use vim'
# $ myba remote add origin "$GITHUB_REPO"
# $ myba push origin
#
# Somewhere, sometime later, we may only have access to encrypted repo:
#
# $ WORK_TREE="$HOME" myba clone "$GITHUB_REPO"
# $ myba log # See plain commit info
# $ myba diff # See changes of tracked $WORK_TREE files
# $ myba checkout $COMMIT # Hash from plain or encrypted repo
# $ myba checkout .config .ssh # Checkout dirs and everything under
# $ [ -f ~/.config/git/config ] && [ -d ~/.ssh ] # Files are restored
#
# The last command Uses sparse-checkout to fetch and unencrypt the right blobs.
# The checkout command asks before overwriting existing files in $WORK_TREE!
#
# See usage AND CODE for details.
#
# shellcheck disable=SC1003,SC2064,SC2086,SC2162,SC3045
set -eu
# Configuration via env vars
WORK_TREE="${WORK_TREE:-${HOME:-~}}"
PLAIN_REPO="$WORK_TREE/.myba"
ENC_REPO="$PLAIN_REPO/_encrypted"
#PASSWORD= # Replace with your encryption password or if null, read stdin
usage () {
echo "Usage: $0 <subcommand> [options]"
echo "Subcommands:"
echo " init Initialize repos in \$WORK_TREE (default: \$HOME)"
echo " add [OPTS] PATH... Stage files for backup/version tracking"
echo " rm PATH... Stage-remove files from future backups/version control"
echo " commit [OPTS] Commit staged changes of tracked files as a snapshot"
echo " push [REMOTE] Encrypt and push files to remote repo(s) (default: all)"
echo " pull [REMOTE] Pull encrypted commits from a promisor remote"
echo " clone REPO_URL Clone an encrypted repo and init from it"
echo " remote CMD [OPTS] Manage remotes of the encrypted repo"
echo " restore [--squash] Reconstruct plain repo commits from encrypted commits"
echo " diff [OPTS] Compare changes between plain repo revisions"
echo " log [OPTS] Show commit log of the plain repo"
echo " checkout PATH... Sparse-checkout and decrypt files into \$WORK_TREE"
echo " checkout COMMIT Switch files to a commit of plain or encrypted repo"
echo " gc Garbage collect, remove synced encrypted packs"
echo " git CMD [OPTS] Inspect/execute raw git commands inside plain repo"
echo " git_enc CMD [OPTS] Inspect/execute raw git commands inside encrypted repo"
echo
echo 'Env vars: WORK_TREE, PLAIN_REPO, PASSWORD USE_GPG, VERBOSE, YES_OVERWRITE'
echo 'For a full list and info, see: https://github.com/kernc/myba/'
exit 1
}
_tab="$(printf '\t')"
git_plain () { git --work-tree="$WORK_TREE" --git-dir="$PLAIN_REPO" "$@"; }
git_enc () { git -C "$ENC_REPO" "$@"; }
_is_binary_stream () { dd bs=8192 count=1 status=none | LC_ALL=C tr -dc '\000' | LC_ALL=C grep -qa .; }
_mktemp () { mktemp -t "$(basename "$0" .sh)-XXXXXXX" "$@"; }
_file_size () { if stat -f%z / >/dev/null 2>&1; then stat -f%z "$@"; else stat -c%s "$@"; fi; }
_ask_pw () {
if [ -z "${PASSWORD+1}" ]; then
stty -echo
IFS= read -p "Enter encryption password: " -r PASSWORD
echo
stty echo
fi
# Set up encryption via OpenSSL
_encrypt_func=_enc_openssl
_decrypt_func=_dec_openssl
_armor_flags='-base64 -A'
_kdf_iters="${KDF_ITERS:-321731}"
# Set up encryption via GPG
if [ "${USE_GPG+1}" ]; then
_encrypt_func=_enc_gpg
_decrypt_func=_dec_gpg
_armor_flags='--armor'
_kdf_iters="${KDF_ITERS:-159011733}" # OpenSSL and GPG use different KDF algos
fi
}
_encrypted_path () {
_hash="$(echo "$1$PASSWORD" | shasum -a512 | cut -c-128)"
_path="$(echo "$_hash" | cut -c1-3)"
_path="$_path/$(echo "$_hash" | cut -c4-6)"
_path="$_path/$(echo "$_hash" | cut -c7-9)"
_path="$_path/$(echo "$_hash" | cut -c10-)"
echo "$_path"
}
_enc_openssl () {
openssl enc -aes-256-ctr -pbkdf2 -md sha512 -iter "$_kdf_iters" -salt -pass fd:3 "$@"
}
_dec_openssl () { _enc_openssl -d "$@"; }
_gpg_common () {
gpg --compress-level 0 \
--passphrase-fd 3 --pinentry-mode loopback --batch \
--cipher-algo AES256 --digest-algo SHA512 \
--s2k-cipher-algo AES256 --s2k-digest-algo SHA512 --s2k-mode 3 --s2k-count "$_kdf_iters" \
"$@"
}
_enc_gpg () { _gpg_common --symmetric "$@"; }
_dec_gpg () { _gpg_common --decrypt "$@"; }
_encrypt () { _pepper="$1"; shift; _with_pw_on_fd3 "$_pepper" $_encrypt_func "$@"; }
_decrypt () { _pepper="$1"; shift; _with_pw_on_fd3 "$_pepper" $_decrypt_func "$@"; }
_encrypt_file () {
_plain_path="$1"
_enc_path="$2"
mkdir -p "$ENC_REPO/$(dirname "$_enc_path")"
is_binary () { git_plain show "HEAD:$_plain_path" | _is_binary_stream; }
compress_if_text () { if is_binary; then cat; else gzip -cv2; fi; }
git_plain show "HEAD:$_plain_path" |
compress_if_text |
_encrypt "$_plain_path" > "$ENC_REPO/$_enc_path"
}
_decrypt_file () {
_enc_path="$1"
_plain_path="$2"
# Check if the plain file already exists
if [ -f "$WORK_TREE/$_plain_path" ] && [ -z "${YES_OVERWRITE:-}" ]; then
echo "WARNING: File '$WORK_TREE/$_plain_path' exists. Overwrite? [y/N]"
read _choice
case "$_choice" in [Yy]*) ;; *) echo "Skipping '$WORK_TREE/$_plain_path'"; return ;; esac
fi
decrypted_tmpfile="$(_mktemp)"
_decrypt "$_plain_path" < "$ENC_REPO/$_enc_path" > "$decrypted_tmpfile"
mkdir -p "$(dirname "$WORK_TREE/$_plain_path")"
if gzip -t "$decrypted_tmpfile" >/dev/null 2>&1; then
gzip -dcv < "$decrypted_tmpfile"
else
cat "$decrypted_tmpfile"
fi > "$WORK_TREE/$_plain_path"
rm "$decrypted_tmpfile"
}
_decrypt_manifests () {
status=0
for file in "$ENC_REPO"/manifest/*; do
fname="$(basename "$file")"
_decrypt "" < "$file" | gzip -dc > "$PLAIN_REPO/manifest/$fname"
if _is_binary_stream < "$PLAIN_REPO/manifest/$fname"; then
echo "WARNING: Likely invalid decryption password for commit '$fname', or your manifest file contains binary paths."
status=1
fi
done
return $status
}
_with_pw_on_fd3 () {
# Pass "$password$1" securely via an open file
_pepper="$1"
exec 3<<EOF
$PASSWORD$_pepper
EOF
shift
"$@"
exec 3<&-
}
cmd_init () {
# Init both dirs repos
mkdir -p "$PLAIN_REPO" "$ENC_REPO"
git -C "$PLAIN_REPO" init -b master --bare
git -C "$ENC_REPO" init -b master
mkdir -p "$PLAIN_REPO/manifest" \
"$ENC_REPO/manifest"
# Don't pollute du
rm "$PLAIN_REPO"/hooks/*.sample \
"$ENC_REPO"/.git/hooks/*.sample
# Configure
email="$USER@$(hostname 2>/dev/null || cat /etc/hostname)"
git_plain config user.name "$USER"
git_plain config user.email "$email"
git_plain config status.showUntrackedFiles no # We don't care to see largely untracked $HOME # XXX: remove this!
git_enc config user.name "$USER"
git_enc config user.email "$email"
# All our files are strictly binary (encrypted)
git_enc config core.bigFileThreshold 100
git_enc config push.autoSetupRemote true
git_enc config push.default upstream
git_enc config fetch.parallel 4
# Set up default gitignore
case $- in *x*) xtrace_was_on=true; set +x ;; esac
echo "$default_gitignore" > "$PLAIN_REPO/info/exclude"
if [ "${xtrace_was_on:-}" ]; then set -x; fi
echo '* -text -diff' >"$ENC_REPO/.git/info/attributes"
# Encrypted repo is a sparse-checkout
git_enc sparse-checkout set "manifest"
git_enc sparse-checkout reapply
}
cmd_clone () {
mkdir -p "$ENC_REPO"
git clone --filter=blob:none --sparse -v "$1" "$ENC_REPO"
cmd_init
_ask_pw
_decrypt_manifests
}
cmd_restore () {
# Convert the encrypted commit messages back to plain repo commits
if [ "$(git_plain ls-files)" ]; then
if [ ! "${YES_OVERWRITE:-}" ]; then
echo "WARNING: Plain repo in '$PLAIN_REPO' already restored (and possibly commited to). To overwrite, set \$YES_OVERWRITE=1."
exit 1
fi
# Remove existing plain repo
git_plain update-ref -d HEAD
git_plain reflog expire --all --expire-unreachable=now
git_plain gc --prune=now --aggressive
fi
temp_dir="$(_mktemp -d)"
trap "rm -rf '$temp_dir'" INT HUP EXIT
_ask_pw
if [ "${1:-}" = "--squash" ]; then
git_enc sparse-checkout disable
git_enc ls-files "manifest/" |
grep -RFhf- "$PLAIN_REPO/manifest" | sort -u |
while IFS="$_tab" read -r _enc_path _plain_path; do
WORK_TREE="$temp_dir" _decrypt_file "$_enc_path" "$_plain_path"
WORK_TREE="$temp_dir" git_plain add "$_plain_path"
done
if ! WORK_TREE="$temp_dir" git_plain diff --staged --quiet; then
WORK_TREE="$temp_dir" git_plain commit -m "Restore '$1' at $(date '+%Y-%m-%d %H:%M:%S%z')"
fi
else
git_enc log --reverse --pretty='%H' |
while IFS= read -r _enc_commit; do
git_enc show --name-only --pretty=format: "$_enc_commit" |
git_enc sparse-checkout set --stdin
git_enc sparse-checkout reapply
# Decrypt and stage files from this commit into temp_dir
plain_commit="$(git_enc show --name-only --pretty=format: "$_enc_commit" -- "manifest/" | cut -d/ -f2)"
while IFS="$_tab" read -r _enc_path _plain_path; do
WORK_TREE="$temp_dir" _decrypt_file "$_enc_path" "$_plain_path"
WORK_TREE="$temp_dir" git_plain add "$_plain_path"
done < "$PLAIN_REPO/manifest/$plain_commit"
# Commit the changes to the plain repo
_msg="$(git_enc show -s --format='%B' "$_enc_commit" | _decrypt "" $_armor_flags | gzip -dc)"
_date="$(git_enc show -s --format='%ai' "$_enc_commit")"
_author="$(git_enc show -s --format='%an <%ae>' "$_enc_commit")"
if ! WORK_TREE="$temp_dir" git_plain diff --staged --quiet; then
WORK_TREE="$temp_dir" git_plain commit --no-gpg-sign -m "$_msg" --date "$_date" --author "$_author"
fi
done
fi
cmd_gc
}
cmd_commit () {
# Commit to plain repo
git_plain commit --message "myba backup $(date '+%Y-%m-%d %H:%M:%S')" --verbose "$@"
# Encrypt and stage encrypted files
_ask_pw
manifest_path="manifest/$(git_plain rev-parse HEAD)"
git_plain show --name-status --pretty=format: HEAD |
while IFS="$_tab" read -r _status _path; do
_enc_path="$(_encrypted_path "$_path")"
_encrypt_file "$_path" "$_enc_path"
# If file larger than 40 MB, configure Git LFS
if [ "$(_file_size "$ENC_REPO/$_enc_path")" -gt $((40 * 1024 * 1024)) ]; then
git_enc lfs track --filename "$_enc_path"
fi
if [ "$_status" = "D" ]; then
git_enc lfs untrack "$_enc_path" || true # Ok if Git LFS is not used
git_enc rm -f --sparse "$_enc_path"
else
git_enc add -v --sparse "$_enc_path"
echo "$_enc_path$_tab$_path" >> "$PLAIN_REPO/$manifest_path"
fi
done
# If first commit, add self
# FIXME: fixme??
if ! git_enc rev-parse HEAD 2>/dev/null; then
_self="$(command -v "$0" 2>/dev/null || echo "$0")"
cp "$_self" "$ENC_REPO/$(basename "$_self")"
git_enc add --sparse "$(basename "$_self")"
fi
# Stage new manifest
gzip -c2 "$PLAIN_REPO/$manifest_path" | _encrypt "" > "$ENC_REPO/$manifest_path"
git_enc add --sparse "$manifest_path"
# Commit to encrypted repo
git_enc status --short
git_enc commit -m "$(git_plain show --format='%B' --name-status | gzip -c9 | _encrypt "" $_armor_flags)"
}
cmd_checkout() {
[ $# -eq 0 ] && { echo 'Nothing to checkout'; exit 1; }
# If a commit hash is provided, checkout that commit in either repo
if git_plain rev-parse --verify "$1^{commit}" >/dev/null 2>&1; then
git_plain checkout "$1"
elif git_enc rev-parse --verify "$1^{commit}" >/dev/null 2>&1; then
git_enc sparse-checkout set "manifest"
git_enc sparse-checkout reapply
git_enc checkout "$1"
_ask_pw
_decrypt_manifests
else
# Otherwise, assume the arguments are paths to files/directories
working_manifest="$PLAIN_REPO/working_manifest"
for pattern in "$@"; do
grep -REIh "$_tab$pattern"'($|/)' "$PLAIN_REPO/manifest"
done | sort -u > "$working_manifest"
cut -f1 "$working_manifest" |
git_enc sparse-checkout set --stdin
git_enc sparse-checkout add "manifest"
git_enc sparse-checkout reapply
_ask_pw
cat "$working_manifest"
while IFS="$_tab" read -r _enc_path _plain_path; do
if [ ! -f "$ENC_REPO/$_enc_path" ]; then
echo "Note, file '$_plain_path' has been since removed."
continue
fi
_decrypt_file "$_enc_path" "$_plain_path"
done < "$working_manifest"
rm "$working_manifest"
fi
}
cmd_rm() {
_ask_pw
_is_error=
for _path in "$@"; do
if ! git_plain ls-files --error-unmatch "$_path" >/dev/null 2>&1; then
echo "$0: Error: '$_path' is not being tracked."
_is_error=1
continue
fi
git_plain rm --cached "$_path" # Leave worktree copy alone
# NOTE: The rest (encrypted repo) is handled in cmd_commit
done
return $_is_error
}
cmd_remote () {
git_enc remote "$@";
if [ "$1" = 'add' ]; then
# Ideally, this would reside in cmd_init, but then
# `git remote add` complains 'error: remote origin already exists'
git_enc config "remote.$2.promisor" true
git_enc config "remote.$2.partialclonefilter" "blob:none"
fi
}
cmd_push () {
if [ $# -eq 0 ]; then
# With no args, push to all remotes
git_enc remote show -n |
while read _origin; do
git_enc push --verbose --all "$_origin"
done
else
git_enc push --verbose --all "$@"
fi
git_enc fetch --refetch --all --verbose --no-write-fetch-head
# Remove redundant files including just-pushed packs
sleep .2 # Fix "fatal: gc is already running on machine"
cmd_gc
}
cmd_gc () {
# Reduce disk usage by removing encrypted repo's blobs
git_enc sparse-checkout set "manifest"
git_enc sparse-checkout reapply
# Outright rm packs for which promisor nodes exist
for file in "$ENC_REPO/.git/objects/pack"/pack-*.pack; do
touch "${file%.pack}.promisor"
rm -f "${file%.pack}.pack" \
"${file%.pack}.idx"
done
}
# Simple passthrough commands
cmd_add () { git_plain add "$@"; }
cmd_diff () { git_plain diff "$@"; }
cmd_pull () { git_enc pull "$@"; _decrypt_manifests; }
cmd_log () {
git_plain log \
--pretty="%C(yellow)%h%C(red) %cd%C(cyan) %s%C(reset)" \
--date=short --name-status "$@"
}
verbose () {
echo "$0: $*"
case "${VERBOSE:-}" in
'') "$@"; ;;
*) set -x; "$@"; set +x; ;;
esac
echo "$0: $1 done ok"
}
default_gitignore='
# Compiled source
build/
_build/
*.py[cod]
*.[oa]
*.la
*.obj
*.[kms]o
*.so.*
*.dylib
*.elf
*.lib
*.dll
*.class
*.out
# Other VCS
.git/
# Ignore Python
.venv/
venv/
python*/site-packages/
*.py[cod]
__pycache__/
.eggs/
*.egg/
*.egg-info
dist/*.tar.gz
dist/*.zip
dist/*.whl
# Ignore JS
node_modules/
jspm_packages/
.npm/
.eslintcache
.yarn/
.grunt/
# Docs
htmlcov/
.tox/
.coverage/
.coverage.*/
coverage.xml
.hypothesis/
.mypy_cache/
# IDEs and editors
.idea/*
.vscode/*
# Temporary & logs
*.cache
*.tmp
tmp/
*.bak
*.old
*.pid
*.lock
*~
logs
*.log
# OS
.DS_Store
Thumbs.db
'
# Main:
cmd="$1"
shift
case "$cmd" in
init) verbose cmd_init ;;
add) verbose cmd_add "$@" ;;
rm) verbose cmd_rm "$@" ;;
commit) verbose cmd_commit "$@" ;;
remote) verbose cmd_remote "$@" ;;
push) verbose cmd_push "$@" ;;
pull) verbose cmd_pull "$@" ;;
clone) verbose cmd_clone "$@" ;;
restore) verbose cmd_restore "$@" ;;
diff) verbose cmd_diff "$@" ;;
log) verbose cmd_log "$@" ;;
checkout) verbose cmd_checkout "$@" ;;
gc) verbose cmd_gc "$@" ;;
git) verbose git_plain "$@" ;;
git_enc) verbose git_enc "$@" ;;
*) usage ;;
esac