From fcae948968b7b0adce6f49a351d14be4fa34dd6e Mon Sep 17 00:00:00 2001
From: Vismayak Mohanarajan <mohanar2@illinois.edu>
Date: Wed, 26 Jun 2024 15:36:19 -0500
Subject: [PATCH 1/2] First pass at creating an image annotating extractor

---
 .../extractor_info.json                       | 34 ++++++++++
 .../image_annotation.py                       | 62 +++++++++++++++++++
 .../requirements.txt                          |  1 +
 3 files changed, 97 insertions(+)
 create mode 100644 sample-extractors/image-annotation-extractor/extractor_info.json
 create mode 100755 sample-extractors/image-annotation-extractor/image_annotation.py
 create mode 100644 sample-extractors/image-annotation-extractor/requirements.txt

diff --git a/sample-extractors/image-annotation-extractor/extractor_info.json b/sample-extractors/image-annotation-extractor/extractor_info.json
new file mode 100644
index 0000000..e155ebf
--- /dev/null
+++ b/sample-extractors/image-annotation-extractor/extractor_info.json
@@ -0,0 +1,34 @@
+{
+  "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
+  "name": "ncsa.image_annotator",
+  "version": "2.0",
+  "description": "Saves user image annotations as metadata",
+  "author": "Vismayak Mohanarajan <mohanar2@illinois.edu>",
+  "contributors": [],
+  "contexts": [
+  ],
+  "repository": [
+    {
+      "repType": "git",
+      "repUrl": "https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git"
+    }
+  ],
+  "process": {
+    "file": [
+      "text/*",
+      "application/json"
+    ]
+  },
+  "external_services": [],
+  "dependencies": [],
+  "bibtex": [],
+  "parameters": {
+    "schema": {
+      "IMAGE_ANNOTATIONS": {
+        "type": "string",
+        "title": "Annotate image",
+        "format": "ImageAnnotator"
+      }
+    }
+  }
+}
diff --git a/sample-extractors/image-annotation-extractor/image_annotation.py b/sample-extractors/image-annotation-extractor/image_annotation.py
new file mode 100755
index 0000000..621d052
--- /dev/null
+++ b/sample-extractors/image-annotation-extractor/image_annotation.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+
+"""Example extractor based on the clowder code."""
+
+import logging
+import subprocess
+import json
+from typing import Dict
+
+from pyclowder.extractors import Extractor
+import pyclowder.files
+
+
+class WordCount(Extractor):
+    """Count the number of characters, words and lines in a text file."""
+    def __init__(self):
+        Extractor.__init__(self)
+
+        # add any additional arguments to parser
+        # self.parser.add_argument('--max', '-m', type=int, nargs='?', default=-1,
+        #                          help='maximum number (default=-1)')
+
+        # parse command line and load default logging configuration
+        self.setup()
+
+        # setup logging for the exctractor
+        logging.getLogger('pyclowder').setLevel(logging.DEBUG)
+        logging.getLogger('__main__').setLevel(logging.DEBUG)
+
+    def process_message(self, connector, host, secret_key, resource, parameters):
+        # Process the file and upload the results
+
+        logger = logging.getLogger(__name__)
+        inputfile = resource["local_paths"][0]
+        file_id = resource['id']
+
+        if 'parameters' in parameters:
+            params = None
+            logging.info("Received parameters")
+            try:
+                params = json.loads(parameters['parameters'])
+            except TypeError as e:
+                print(f"Failed to load parameters, it's not compatible with json.loads().\nError:{e}")
+                if type(parameters == Dict):
+                    params = parameters['parameters']
+        if "IMAGE_ANNOTATIONS" in params:
+            image_annotations = params["IMAGE_ANNOTATIONS"]
+            logging.info(f"Image annotations: {image_annotations}")
+
+            result = json.loads(image_annotations)
+
+            metadata = self.get_metadata(result, 'file', file_id, host)
+
+            # Normal logs will appear in the extractor log, but NOT in the Clowder UI.
+            logger.debug(metadata)
+
+            # Upload metadata to original file
+            pyclowder.files.upload_metadata(connector, host, secret_key, file_id, metadata)
+
+if __name__ == "__main__":
+    extractor = WordCount()
+    extractor.start()
diff --git a/sample-extractors/image-annotation-extractor/requirements.txt b/sample-extractors/image-annotation-extractor/requirements.txt
new file mode 100644
index 0000000..7c54c75
--- /dev/null
+++ b/sample-extractors/image-annotation-extractor/requirements.txt
@@ -0,0 +1 @@
+pyclowder==3.0.7

From 7aadc3c05925ab2c9e6cb8bf2e4167c07c795d13 Mon Sep 17 00:00:00 2001
From: Vismayak Mohanarajan <mohanar2@illinois.edu>
Date: Tue, 27 Aug 2024 14:45:17 -0500
Subject: [PATCH 2/2] Update image_annotation.py

---
 .../image-annotation-extractor/image_annotation.py           | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sample-extractors/image-annotation-extractor/image_annotation.py b/sample-extractors/image-annotation-extractor/image_annotation.py
index 621d052..806844c 100755
--- a/sample-extractors/image-annotation-extractor/image_annotation.py
+++ b/sample-extractors/image-annotation-extractor/image_annotation.py
@@ -11,7 +11,7 @@
 import pyclowder.files
 
 
-class WordCount(Extractor):
+class ImageAnnotator(Extractor):
     """Count the number of characters, words and lines in a text file."""
     def __init__(self):
         Extractor.__init__(self)
@@ -33,6 +33,7 @@ def process_message(self, connector, host, secret_key, resource, parameters):
         logger = logging.getLogger(__name__)
         inputfile = resource["local_paths"][0]
         file_id = resource['id']
+        print(f"Parameters: {parameters}")
 
         if 'parameters' in parameters:
             params = None
@@ -58,5 +59,5 @@ def process_message(self, connector, host, secret_key, resource, parameters):
             pyclowder.files.upload_metadata(connector, host, secret_key, file_id, metadata)
 
 if __name__ == "__main__":
-    extractor = WordCount()
+    extractor = ImageAnnotator()
     extractor.start()