Skip to content

Commit

Permalink
Reformat codebase with ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
kynan committed Jan 18, 2025
1 parent c97453b commit 9e4873d
Show file tree
Hide file tree
Showing 9 changed files with 368 additions and 243 deletions.
4 changes: 2 additions & 2 deletions nbstripout/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._nbstripout import install, uninstall, status, main, __doc__ as docstring
from ._utils import pop_recursive, strip_output, MetadataError
__all__ = ["install", "uninstall", "status", "main",
"pop_recursive", "strip_output", "MetadataError"]

__all__ = ['install', 'uninstall', 'status', 'main', 'pop_recursive', 'strip_output', 'MetadataError']
__doc__ = docstring
205 changes: 131 additions & 74 deletions nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@

from nbstripout._utils import strip_output, strip_zeppelin_output

__all__ = ["install", "uninstall", "status", "main"]
__all__ = ['install', 'uninstall', 'status', 'main']
__version__ = '0.8.1'


Expand All @@ -136,13 +136,17 @@

def _get_system_gitconfig_folder():
try:
git_config_output = check_output(['git', 'config', '--system', '--list', '--show-origin'], universal_newlines=True, stderr=STDOUT).strip()
git_config_output = check_output(
['git', 'config', '--system', '--list', '--show-origin'], universal_newlines=True, stderr=STDOUT
).strip()

# If the output is empty, it means the file exists but is empty, so we cannot get the path.
# To still get it, we're setting a temporary config parameter.
if git_config_output == '':
check_call(['git', 'config', '--system', 'filter.nbstripoutput.test', 'test'])
git_config_output = check_output(['git', 'config', '--system', '--list', '--show-origin'], universal_newlines=True).strip()
git_config_output = check_output(
['git', 'config', '--system', '--list', '--show-origin'], universal_newlines=True
).strip()
check_call(['git', 'config', '--system', '--unset', 'filter.nbstripoutput.test'])

output_lines = git_config_output.split('\n')
Expand Down Expand Up @@ -192,7 +196,7 @@ def _parse_size(num_str):
elif num_str[-1] == 'G':
return int(num_str[:-1]) * (10**9)
else:
raise ValueError(f"Unknown size identifier {num_str[-1]}")
raise ValueError(f'Unknown size identifier {num_str[-1]}')


def install(git_config, install_location=INSTALL_LOCATION_LOCAL, python=None, attrfile=None):
Expand Down Expand Up @@ -280,7 +284,9 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False):
elif install_location == INSTALL_LOCATION_GLOBAL:
location = 'globally'
else:
git_dir = path.dirname(path.abspath(check_output(['git', 'rev-parse', '--git-dir'], universal_newlines=True).strip()))
git_dir = path.dirname(
path.abspath(check_output(['git', 'rev-parse', '--git-dir'], universal_newlines=True).strip())
)
location = f"in repository '{git_dir}'"

clean = check_output(git_config + ['filter.nbstripout.clean'], universal_newlines=True).strip()
Expand All @@ -299,7 +305,9 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False):
diff_attributes = ''.join(line for line in attrs if 'diff' in line).strip()
else:
attributes = check_output(['git', 'check-attr', 'filter', '--', '*.ipynb'], universal_newlines=True).strip()
diff_attributes = check_output(['git', 'check-attr', 'diff', '--', '*.ipynb'], universal_newlines=True).strip()
diff_attributes = check_output(
['git', 'check-attr', 'diff', '--', '*.ipynb'], universal_newlines=True
).strip()

try:
extra_keys = check_output(git_config + ['filter.nbstripout.extrakeys'], universal_newlines=True).strip()
Expand Down Expand Up @@ -333,16 +341,24 @@ def status(git_config, install_location=INSTALL_LOCATION_LOCAL, verbose=False):

return 1


def process_jupyter_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'):
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
warnings.simplefilter('ignore', category=UserWarning)
nb = nbformat.read(input_stream, as_version=nbformat.NO_CONVERT)

nb_orig = copy.deepcopy(nb)
nb_stripped = strip_output(nb, args.keep_output, args.keep_count,
args.keep_id, extra_keys, args.drop_empty_cells,
args.drop_tagged_cells.split(),
args.strip_init_cells, _parse_size(args.max_size))
nb_stripped = strip_output(
nb,
args.keep_output,
args.keep_count,
args.keep_id,
extra_keys,
args.drop_empty_cells,
args.drop_tagged_cells.split(),
args.strip_init_cells,
_parse_size(args.max_size),
)

any_change = nb_orig != nb_stripped

Expand All @@ -355,11 +371,12 @@ def process_jupyter_notebook(input_stream, output_stream, args, extra_keys, file
output_stream.seek(0)
output_stream.truncate()
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=UserWarning)
warnings.simplefilter('ignore', category=UserWarning)
nbformat.write(nb_stripped, output_stream)
output_stream.flush()
return any_change


def process_zeppelin_notebook(input_stream, output_stream, args, extra_keys, filename='input from stdin'):
nb = json.load(input_stream, object_pairs_hook=collections.OrderedDict)
nb_orig = copy.deepcopy(nb)
Expand All @@ -380,66 +397,94 @@ def process_zeppelin_notebook(input_stream, output_stream, args, extra_keys, fil
output_stream.flush()
return any_change


def main():
parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter)
task = parser.add_mutually_exclusive_group()
task.add_argument('--dry-run', action='store_true',
help='Print which notebooks would have been stripped')
task.add_argument('--install', action='store_true',
help='Install nbstripout in the current repository (set '
'up the git filter and attributes)')
task.add_argument('--uninstall', action='store_true',
help='Uninstall nbstripout from the current repository '
'(remove the git filter and attributes)')
task.add_argument('--is-installed', action='store_true',
help='Check if nbstripout is installed in current repository')
task.add_argument('--status', action='store_true',
help='Print status of nbstripout installation in current '
'repository and configuration summary if installed')
task.add_argument('--version', action='store_true',
help='Print version')
parser.add_argument("--verify", action="store_true",
help="Return a non-zero exit code if any files were changed, Implies --dry-run")
parser.add_argument('--keep-count', action='store_true',
help='Do not strip the execution count/prompt number')
parser.add_argument('--keep-output', action='store_true',
help='Do not strip output', default=None)
parser.add_argument('--keep-id', action='store_true',
help='Keep the randomly generated cell ids, '
'which will be different after each execution.')
parser.add_argument('--extra-keys', default='',
help='Space separated list of extra keys to strip '
'from metadata, e.g. metadata.foo cell.metadata.bar')
parser.add_argument('--keep-metadata-keys', default='',
help='Space separated list of metadata keys to keep'
', e.g. metadata.foo cell.metadata.bar')
parser.add_argument('--drop-empty-cells', action='store_true',
help='Remove cells where `source` is empty or contains only whitepace')
parser.add_argument('--drop-tagged-cells', default='',
help='Space separated list of cell-tags that remove an entire cell')
parser.add_argument('--strip-init-cells', action='store_true',
help='Remove cells with `init_cell: true` metadata (default: False)')
parser.add_argument('--attributes', metavar='FILEPATH',
help='Attributes file to add the filter to (in '
'combination with --install/--uninstall), '
'defaults to .git/info/attributes')
task.add_argument('--dry-run', action='store_true', help='Print which notebooks would have been stripped')
task.add_argument(
'--install',
action='store_true',
help='Install nbstripout in the current repository (set up the git filter and attributes)',
)
task.add_argument(
'--uninstall',
action='store_true',
help='Uninstall nbstripout from the current repository (remove the git filter and attributes)',
)
task.add_argument(
'--is-installed', action='store_true', help='Check if nbstripout is installed in current repository'
)
task.add_argument(
'--status',
action='store_true',
help='Print status of nbstripout installation in current repository and configuration summary if installed',
)
task.add_argument('--version', action='store_true', help='Print version')
parser.add_argument(
'--verify', action='store_true', help='Return a non-zero exit code if any files were changed, Implies --dry-run'
)
parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number')
parser.add_argument('--keep-output', action='store_true', help='Do not strip output', default=None)
parser.add_argument(
'--keep-id',
action='store_true',
help='Keep the randomly generated cell ids, which will be different after each execution.',
)
parser.add_argument(
'--extra-keys',
default='',
help='Space separated list of extra keys to strip from metadata, e.g. metadata.foo cell.metadata.bar',
)
parser.add_argument(
'--keep-metadata-keys',
default='',
help='Space separated list of metadata keys to keep, e.g. metadata.foo cell.metadata.bar',
)
parser.add_argument(
'--drop-empty-cells',
action='store_true',
help='Remove cells where `source` is empty or contains only whitepace',
)
parser.add_argument(
'--drop-tagged-cells', default='', help='Space separated list of cell-tags that remove an entire cell'
)
parser.add_argument(
'--strip-init-cells', action='store_true', help='Remove cells with `init_cell: true` metadata (default: False)'
)
parser.add_argument(
'--attributes',
metavar='FILEPATH',
help='Attributes file to add the filter to (in '
'combination with --install/--uninstall), '
'defaults to .git/info/attributes',
)
location = parser.add_mutually_exclusive_group()
location.add_argument('--global', dest='_global', action='store_true',
help='Use global git config (default is local config)')
location.add_argument('--system', dest='_system', action='store_true',
help='Use system git config (default is local config)')
location.add_argument('--python', dest='_python', metavar="PATH",
help='Path to python executable to use when --install\'ing '
'(default is deduced from `sys.executable`)')
parser.add_argument('--force', '-f', action='store_true',
help='Strip output also from files with non ipynb extension')
parser.add_argument('--max-size', metavar='SIZE',
help='Keep outputs smaller than SIZE', default='0')
parser.add_argument('--mode', '-m', default='jupyter', choices=['jupyter', 'zeppelin'],
help='Specify mode between [jupyter (default) | zeppelin] (to be used in combination with -f)')

parser.add_argument('--textconv', '-t', action='store_true',
help='Prints stripped files to STDOUT')
location.add_argument(
'--global', dest='_global', action='store_true', help='Use global git config (default is local config)'
)
location.add_argument(
'--system', dest='_system', action='store_true', help='Use system git config (default is local config)'
)
location.add_argument(
'--python',
dest='_python',
metavar='PATH',
help="Path to python executable to use when --install'ing (default is deduced from `sys.executable`)",
)
parser.add_argument(
'--force', '-f', action='store_true', help='Strip output also from files with non ipynb extension'
)
parser.add_argument('--max-size', metavar='SIZE', help='Keep outputs smaller than SIZE', default='0')
parser.add_argument(
'--mode',
'-m',
default='jupyter',
choices=['jupyter', 'zeppelin'],
help='Specify mode between [jupyter (default) | zeppelin] (to be used in combination with -f)',
)

parser.add_argument('--textconv', '-t', action='store_true', help='Prints stripped files to STDOUT')

parser.add_argument('files', nargs='*', help='Files to strip output from')
args = parser.parse_args()
Expand Down Expand Up @@ -482,17 +527,29 @@ def main():
]

try:
extra_keys.extend(check_output((git_config if args._system or args._global else ['git', 'config']) + ['filter.nbstripout.extrakeys'], universal_newlines=True).strip().split())
extra_keys.extend(
check_output(
(git_config if args._system or args._global else ['git', 'config']) + ['filter.nbstripout.extrakeys'],
universal_newlines=True,
)
.strip()
.split()
)
except (CalledProcessError, FileNotFoundError):
pass

extra_keys.extend(args.extra_keys.split())

try:
keep_metadata_keys = check_output(
(git_config if args._system or args._global else ['git', 'config']) + ['filter.nbstripout.keepmetadatakeys'],
universal_newlines=True
).strip().split()
keep_metadata_keys = (
check_output(
(git_config if args._system or args._global else ['git', 'config'])
+ ['filter.nbstripout.keepmetadatakeys'],
universal_newlines=True,
)
.strip()
.split()
)
except (CalledProcessError, FileNotFoundError):
keep_metadata_keys = []
keep_metadata_keys.extend(args.keep_metadata_keys.split())
Expand Down Expand Up @@ -533,6 +590,6 @@ def main():
except nbformat.reader.NotJSONError:
print('No valid notebook detected on stdin', file=sys.stderr)
raise SystemExit(1)

if args.verify and any_change:
raise SystemExit(1)
27 changes: 16 additions & 11 deletions nbstripout/_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from collections import defaultdict
import sys

__all__ = ["pop_recursive", "strip_output", "strip_zeppelin_output", "MetadataError"]
__all__ = ['pop_recursive', 'strip_output', 'strip_zeppelin_output', 'MetadataError']


class MetadataError(Exception):
Expand Down Expand Up @@ -45,7 +45,7 @@ def _cells(nb, conditionals):


def get_size(item):
""" Recursively sums length of all strings in `item` """
"""Recursively sums length of all strings in `item`"""
if isinstance(item, str):
return len(item)
elif isinstance(item, list):
Expand Down Expand Up @@ -73,9 +73,7 @@ def determine_keep_output(cell, default, strip_init_cells=False):

# keep_output between metadata and tags should not contradict each other
if has_keep_output_metadata and has_keep_output_tag and not keep_output_metadata:
raise MetadataError(
'cell metadata contradicts tags: `keep_output` is false, but `keep_output` in tags'
)
raise MetadataError('cell metadata contradicts tags: `keep_output` is false, but `keep_output` in tags')

if has_keep_output_metadata or has_keep_output_tag:
return keep_output_metadata or has_keep_output_tag
Expand All @@ -94,8 +92,17 @@ def strip_zeppelin_output(nb):
return nb


def strip_output(nb, keep_output, keep_count, keep_id, extra_keys=[], drop_empty_cells=False, drop_tagged_cells=[],
strip_init_cells=False, max_size=0):
def strip_output(
nb,
keep_output,
keep_count,
keep_id,
extra_keys=[],
drop_empty_cells=False,
drop_tagged_cells=[],
strip_init_cells=False,
max_size=0,
):
"""
Strip the outputs, execution count/prompt number and miscellaneous
metadata from a notebook object, unless specified to keep either the outputs
Expand All @@ -122,18 +129,16 @@ def strip_output(nb, keep_output, keep_count, keep_id, extra_keys=[], drop_empty
if drop_empty_cells:
conditionals.append(lambda c: any(line.strip() for line in c.get('source', [])))
for tag_to_drop in drop_tagged_cells:
conditionals.append(lambda c: tag_to_drop not in c.get("metadata", {}).get("tags", []))
conditionals.append(lambda c: tag_to_drop not in c.get('metadata', {}).get('tags', []))

for i, cell in enumerate(_cells(nb, conditionals)):
keep_output_this_cell = determine_keep_output(cell, keep_output, strip_init_cells)

# Remove the outputs, unless directed otherwise
if 'outputs' in cell:

# Default behavior (max_size == 0) strips all outputs.
if not keep_output_this_cell:
cell['outputs'] = [output for output in cell['outputs']
if get_size(output) <= max_size]
cell['outputs'] = [output for output in cell['outputs'] if get_size(output) <= max_size]

# Strip the counts from the outputs that were kept if not keep_count.
if not keep_count:
Expand Down
Loading

0 comments on commit 9e4873d

Please sign in to comment.