From fcae948968b7b0adce6f49a351d14be4fa34dd6e Mon Sep 17 00:00:00 2001 From: Vismayak Mohanarajan Date: Wed, 26 Jun 2024 15:36:19 -0500 Subject: [PATCH 1/2] First pass at creating an image annotating extractor --- .../extractor_info.json | 34 ++++++++++ .../image_annotation.py | 62 +++++++++++++++++++ .../requirements.txt | 1 + 3 files changed, 97 insertions(+) create mode 100644 sample-extractors/image-annotation-extractor/extractor_info.json create mode 100755 sample-extractors/image-annotation-extractor/image_annotation.py create mode 100644 sample-extractors/image-annotation-extractor/requirements.txt diff --git a/sample-extractors/image-annotation-extractor/extractor_info.json b/sample-extractors/image-annotation-extractor/extractor_info.json new file mode 100644 index 0000000..e155ebf --- /dev/null +++ b/sample-extractors/image-annotation-extractor/extractor_info.json @@ -0,0 +1,34 @@ +{ + "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld", + "name": "ncsa.image_annotator", + "version": "2.0", + "description": "Saves user image annotations as metadata", + "author": "Vismayak Mohanarajan ", + "contributors": [], + "contexts": [ + ], + "repository": [ + { + "repType": "git", + "repUrl": "https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git" + } + ], + "process": { + "file": [ + "text/*", + "application/json" + ] + }, + "external_services": [], + "dependencies": [], + "bibtex": [], + "parameters": { + "schema": { + "IMAGE_ANNOTATIONS": { + "type": "string", + "title": "Annotate image", + "format": "ImageAnnotator" + } + } + } +} diff --git a/sample-extractors/image-annotation-extractor/image_annotation.py b/sample-extractors/image-annotation-extractor/image_annotation.py new file mode 100755 index 0000000..621d052 --- /dev/null +++ b/sample-extractors/image-annotation-extractor/image_annotation.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python + +"""Example extractor based on the clowder code.""" + +import logging +import subprocess +import json +from typing import Dict + +from pyclowder.extractors import Extractor +import pyclowder.files + + +class WordCount(Extractor): + """Count the number of characters, words and lines in a text file.""" + def __init__(self): + Extractor.__init__(self) + + # add any additional arguments to parser + # self.parser.add_argument('--max', '-m', type=int, nargs='?', default=-1, + # help='maximum number (default=-1)') + + # parse command line and load default logging configuration + self.setup() + + # setup logging for the exctractor + logging.getLogger('pyclowder').setLevel(logging.DEBUG) + logging.getLogger('__main__').setLevel(logging.DEBUG) + + def process_message(self, connector, host, secret_key, resource, parameters): + # Process the file and upload the results + + logger = logging.getLogger(__name__) + inputfile = resource["local_paths"][0] + file_id = resource['id'] + + if 'parameters' in parameters: + params = None + logging.info("Received parameters") + try: + params = json.loads(parameters['parameters']) + except TypeError as e: + print(f"Failed to load parameters, it's not compatible with json.loads().\nError:{e}") + if type(parameters == Dict): + params = parameters['parameters'] + if "IMAGE_ANNOTATIONS" in params: + image_annotations = params["IMAGE_ANNOTATIONS"] + logging.info(f"Image annotations: {image_annotations}") + + result = json.loads(image_annotations) + + metadata = self.get_metadata(result, 'file', file_id, host) + + # Normal logs will appear in the extractor log, but NOT in the Clowder UI. + logger.debug(metadata) + + # Upload metadata to original file + pyclowder.files.upload_metadata(connector, host, secret_key, file_id, metadata) + +if __name__ == "__main__": + extractor = WordCount() + extractor.start() diff --git a/sample-extractors/image-annotation-extractor/requirements.txt b/sample-extractors/image-annotation-extractor/requirements.txt new file mode 100644 index 0000000..7c54c75 --- /dev/null +++ b/sample-extractors/image-annotation-extractor/requirements.txt @@ -0,0 +1 @@ +pyclowder==3.0.7 From 7aadc3c05925ab2c9e6cb8bf2e4167c07c795d13 Mon Sep 17 00:00:00 2001 From: Vismayak Mohanarajan Date: Tue, 27 Aug 2024 14:45:17 -0500 Subject: [PATCH 2/2] Update image_annotation.py --- .../image-annotation-extractor/image_annotation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sample-extractors/image-annotation-extractor/image_annotation.py b/sample-extractors/image-annotation-extractor/image_annotation.py index 621d052..806844c 100755 --- a/sample-extractors/image-annotation-extractor/image_annotation.py +++ b/sample-extractors/image-annotation-extractor/image_annotation.py @@ -11,7 +11,7 @@ import pyclowder.files -class WordCount(Extractor): +class ImageAnnotator(Extractor): """Count the number of characters, words and lines in a text file.""" def __init__(self): Extractor.__init__(self) @@ -33,6 +33,7 @@ def process_message(self, connector, host, secret_key, resource, parameters): logger = logging.getLogger(__name__) inputfile = resource["local_paths"][0] file_id = resource['id'] + print(f"Parameters: {parameters}") if 'parameters' in parameters: params = None @@ -58,5 +59,5 @@ def process_message(self, connector, host, secret_key, resource, parameters): pyclowder.files.upload_metadata(connector, host, secret_key, file_id, metadata) if __name__ == "__main__": - extractor = WordCount() + extractor = ImageAnnotator() extractor.start()