diff --git a/app/tasks/html_crawl/main.py b/app/tasks/html_crawl/main.py
index c3d5b5d..0211245 100644
--- a/app/tasks/html_crawl/main.py
+++ b/app/tasks/html_crawl/main.py
@@ -40,14 +40,23 @@ def get_html_crawl(self, crawl_id):
# We start the crawl in a separate process so each
# crawl creates its own Twisted reactor
- process = Process(
- target=start_crawl,
- kwargs={"html_crawl": html_crawl, "url": crawl.url, "crawl_id": crawl.id})
- process.start()
- process.join(120) # Wait 120 seconds for the crawler to finish
- if process.is_alive():
- logger.error(
- "Crawler timed out, the crawl may not contain enough pages")
- process.terminate()
- process.join()
+ try:
+ process = Process(
+ target=start_crawl,
+ kwargs={"html_crawl": html_crawl,
+ "url": crawl.url, "crawl_id": crawl.id}
+ )
+ process.start()
+ process.join(180) # Wait 120 seconds for the crawler to finish
+ if process.is_alive():
+ logger.error(
+ "Crawler timed out, the crawl may not contain enough pages")
+ process.terminate()
+ process.join()
+
+ except Exception as e:
+ logger.error(f"Error while crawling html files: {e}")
+ html_crawl.update(status=ProcessStatus.ERROR, task_id=self.request.id)
+ crawls.update_task(crawl_id=crawl.id,
+ task_name="html_crawl", task=html_crawl)
return
diff --git a/tests/tests_services/__init__.py b/tests/tests_services/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/tests_services/test_carbon_calculator.py b/tests/tests_services/test_carbon_calculator.py
deleted file mode 100644
index e836855..0000000
--- a/tests/tests_services/test_carbon_calculator.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import unittest
-from unittest.mock import patch, Mock
-
-import requests
-
-from app.services.carbon_calculator import CarbonCalculator, CarbonCalculatorError
-
-
-class TestCarbonCalculator(unittest.TestCase):
-
- def setUp(self):
- self.calculator = CarbonCalculator()
-
- def test_empty_url_raises_value_error(self):
- with self.assertRaises(ValueError, msg="URL cannot be empty."):
- self.calculator.get_carbon_footprint("")
-
- @patch('app.services.carbon_calculator.requests.get')
- def test_valid_request_returns_json(self, mock_get):
- mock_response = Mock()
- mock_response.json.return_value = {"result": "success"}
- mock_response.raise_for_status.return_value = None
- mock_get.return_value = mock_response
-
- url = "https://example.com"
- result = self.calculator.get_carbon_footprint(url)
- self.assertEqual(result, {"result": "success"})
-
- @patch('app.services.carbon_calculator.requests.get')
- def test_request_exception_raises_carbon_calculator_error(self, mock_get):
- mock_get.side_effect = requests.RequestException("Request error")
-
- with self.assertRaisesRegex(CarbonCalculatorError, "Request to Carbon Calculator API failed: Request error"):
- self.calculator.get_carbon_footprint("https://example.com")
-
- @patch('app.services.carbon_calculator.requests.get')
- def test_invalid_json_raises_carbon_calculator_error(self, mock_get):
- mock_response = Mock()
- mock_response.json.side_effect = ValueError("Invalid JSON")
- mock_response.raise_for_status.return_value = None
- mock_get.return_value = mock_response
-
- with self.assertRaisesRegex(CarbonCalculatorError, "Failed to decode API response: Invalid JSON"):
- self.calculator.get_carbon_footprint("https://example.com")
-
- @patch('app.services.carbon_calculator.requests.get')
- def test_http_error_raises_carbon_calculator_error(self, mock_get):
- mock_response = Mock()
- mock_response.raise_for_status.side_effect = requests.HTTPError(
- "404 Not Found")
- mock_get.return_value = mock_response
-
- with self.assertRaisesRegex(CarbonCalculatorError, "Request to Carbon Calculator API failed: 404 Not Found"):
- self.calculator.get_carbon_footprint("https://example.com")
-
- @patch('app.services.carbon_calculator.requests.get')
- def test_timeout_error_raises_carbon_calculator_error(self, mock_get):
- mock_get.side_effect = requests.Timeout("Request timed out")
-
- with self.assertRaisesRegex(CarbonCalculatorError, "Request to Carbon Calculator API failed: Request timed out"):
- self.calculator.get_carbon_footprint("https://example.com")
-
- # Optionally, you could add more tests for other exceptions raised by the requests library or other scenarios.
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/tests_services/test_lighthouse_calculator.py b/tests/tests_services/test_lighthouse_calculator.py
deleted file mode 100644
index f63c317..0000000
--- a/tests/tests_services/test_lighthouse_calculator.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import json
-import unittest
-from unittest.mock import patch, Mock
-
-from app.services.lighthouse_calculator import (
- LighthouseCalculator,
- LighthouseError,
-)
-
-
-class TestLighthouseCalculator(unittest.TestCase):
- @patch("subprocess.run")
- def test_get_lighthouse(self, mock_run):
- # Mock a lighthouse response
- mock_response = {"categories": {"accessibility": {"score": 100}}}
- mock_run.return_value = Mock(
- stdout=json.dumps(mock_response).encode("utf-8")
- )
- wrapper = LighthouseCalculator()
- result = wrapper.get_lighthouse(url="http://example.com")
- self.assertEqual(
- result, {"categories": {"accessibility": {"score": 100}}})
-
- @patch("subprocess.run")
- def test_get_lighthouse_error(self, mock_run):
- mock_run.side_effect = LighthouseError
- wrapper = LighthouseCalculator()
- with self.assertRaises(LighthouseError):
- wrapper.get_lighthouse(url="http://example.com")
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/tests_services/test_technologies_calculator.py b/tests/tests_services/test_technologies_calculator.py
deleted file mode 100644
index 8cf2c2d..0000000
--- a/tests/tests_services/test_technologies_calculator.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import json
-import unittest
-from unittest.mock import patch, Mock
-
-from app.services.technologies_calculator import (
- TechnologiesCalculator,
- TechnologiesError,
-)
-
-
-class TestTechnologiesCalculator(unittest.TestCase):
- @patch("subprocess.run")
- def test_get_technologies_success(self, mock_run):
- # Setup
- mock_result = Mock()
- mock_result.stdout = json.dumps(
- {
- "technologies": [
- {"name": "Tech1", "confidence": 100},
- {"name": "Tech2", "confidence": 50},
- ]
- }
- ).encode()
- mock_run.return_value = mock_result
-
- calc = TechnologiesCalculator()
-
- # Execution
- result = calc.get_technologies("http://example.com")
-
- # Assertion
- self.assertEqual(result, [{"name": "Tech1", "confidence": 100}])
-
- @patch("subprocess.run")
- def test_get_technologies_error(self, mock_run):
- # Setup
- mock_run.side_effect = Exception("Error running subprocess")
-
- calc = TechnologiesCalculator()
-
- # Execution & Assertion
- with self.assertRaises(TechnologiesError):
- calc.get_technologies("http://example.com")
-
- def test__be_agnostic(self):
- # Setup
- calc = TechnologiesCalculator()
- input_data = [
- {"name": "Tech1", "confidence": 100},
- {"name": "Tech2", "confidence": 50},
- {"name": "Tech3", "confidence": 100},
- ]
-
- # Execution
- result = calc._be_agnostic(input_data)
-
- # Assertion
- self.assertEqual(
- result,
- [
- {"name": "Tech1", "confidence": 100},
- {"name": "Tech3", "confidence": 100},
- ],
- )
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/tests_services/test_url_cleaner.py b/tests/tests_services/test_url_cleaner.py
deleted file mode 100644
index af8dc1a..0000000
--- a/tests/tests_services/test_url_cleaner.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import unittest
-
-from app.services.url_cleaner import clean_url
-
-
-class TestCleanUrl(unittest.TestCase):
- def test_clean_url_no_changes_needed(self):
- self.assertEqual(clean_url("http://example.com"), "http://example.com")
-
- def test_clean_url_remove_trailing_slash(self):
- self.assertEqual(clean_url("http://example.com/"), "http://example.com")
-
- def test_clean_url_remove_spaces(self):
- self.assertEqual(clean_url("http://exa mple.com"), "http://example.com")
-
- def test_clean_url_encode_decode(self):
- # This test is essentially verifying that encoding and immediately decoding a string does not change it.
- # The functionality might seem redundant, but it is present in your provided function.
- self.assertEqual(clean_url("http://example.com"), "http://example.com")
-
- def test_clean_url_combined(self):
- self.assertEqual(
- clean_url("http://exa mple.com/ "), "http://example.com"
- )
-
-
-if __name__ == "__main__":
- unittest.main()