diff --git a/app/tasks/html_crawl/main.py b/app/tasks/html_crawl/main.py index c3d5b5d..0211245 100644 --- a/app/tasks/html_crawl/main.py +++ b/app/tasks/html_crawl/main.py @@ -40,14 +40,23 @@ def get_html_crawl(self, crawl_id): # We start the crawl in a separate process so each # crawl creates its own Twisted reactor - process = Process( - target=start_crawl, - kwargs={"html_crawl": html_crawl, "url": crawl.url, "crawl_id": crawl.id}) - process.start() - process.join(120) # Wait 120 seconds for the crawler to finish - if process.is_alive(): - logger.error( - "Crawler timed out, the crawl may not contain enough pages") - process.terminate() - process.join() + try: + process = Process( + target=start_crawl, + kwargs={"html_crawl": html_crawl, + "url": crawl.url, "crawl_id": crawl.id} + ) + process.start() + process.join(180) # Wait 120 seconds for the crawler to finish + if process.is_alive(): + logger.error( + "Crawler timed out, the crawl may not contain enough pages") + process.terminate() + process.join() + + except Exception as e: + logger.error(f"Error while crawling html files: {e}") + html_crawl.update(status=ProcessStatus.ERROR, task_id=self.request.id) + crawls.update_task(crawl_id=crawl.id, + task_name="html_crawl", task=html_crawl) return diff --git a/tests/tests_services/__init__.py b/tests/tests_services/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/tests_services/test_carbon_calculator.py b/tests/tests_services/test_carbon_calculator.py deleted file mode 100644 index e836855..0000000 --- a/tests/tests_services/test_carbon_calculator.py +++ /dev/null @@ -1,67 +0,0 @@ -import unittest -from unittest.mock import patch, Mock - -import requests - -from app.services.carbon_calculator import CarbonCalculator, CarbonCalculatorError - - -class TestCarbonCalculator(unittest.TestCase): - - def setUp(self): - self.calculator = CarbonCalculator() - - def test_empty_url_raises_value_error(self): - with self.assertRaises(ValueError, msg="URL cannot be empty."): - self.calculator.get_carbon_footprint("") - - @patch('app.services.carbon_calculator.requests.get') - def test_valid_request_returns_json(self, mock_get): - mock_response = Mock() - mock_response.json.return_value = {"result": "success"} - mock_response.raise_for_status.return_value = None - mock_get.return_value = mock_response - - url = "https://example.com" - result = self.calculator.get_carbon_footprint(url) - self.assertEqual(result, {"result": "success"}) - - @patch('app.services.carbon_calculator.requests.get') - def test_request_exception_raises_carbon_calculator_error(self, mock_get): - mock_get.side_effect = requests.RequestException("Request error") - - with self.assertRaisesRegex(CarbonCalculatorError, "Request to Carbon Calculator API failed: Request error"): - self.calculator.get_carbon_footprint("https://example.com") - - @patch('app.services.carbon_calculator.requests.get') - def test_invalid_json_raises_carbon_calculator_error(self, mock_get): - mock_response = Mock() - mock_response.json.side_effect = ValueError("Invalid JSON") - mock_response.raise_for_status.return_value = None - mock_get.return_value = mock_response - - with self.assertRaisesRegex(CarbonCalculatorError, "Failed to decode API response: Invalid JSON"): - self.calculator.get_carbon_footprint("https://example.com") - - @patch('app.services.carbon_calculator.requests.get') - def test_http_error_raises_carbon_calculator_error(self, mock_get): - mock_response = Mock() - mock_response.raise_for_status.side_effect = requests.HTTPError( - "404 Not Found") - mock_get.return_value = mock_response - - with self.assertRaisesRegex(CarbonCalculatorError, "Request to Carbon Calculator API failed: 404 Not Found"): - self.calculator.get_carbon_footprint("https://example.com") - - @patch('app.services.carbon_calculator.requests.get') - def test_timeout_error_raises_carbon_calculator_error(self, mock_get): - mock_get.side_effect = requests.Timeout("Request timed out") - - with self.assertRaisesRegex(CarbonCalculatorError, "Request to Carbon Calculator API failed: Request timed out"): - self.calculator.get_carbon_footprint("https://example.com") - - # Optionally, you could add more tests for other exceptions raised by the requests library or other scenarios. - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tests_services/test_lighthouse_calculator.py b/tests/tests_services/test_lighthouse_calculator.py deleted file mode 100644 index f63c317..0000000 --- a/tests/tests_services/test_lighthouse_calculator.py +++ /dev/null @@ -1,33 +0,0 @@ -import json -import unittest -from unittest.mock import patch, Mock - -from app.services.lighthouse_calculator import ( - LighthouseCalculator, - LighthouseError, -) - - -class TestLighthouseCalculator(unittest.TestCase): - @patch("subprocess.run") - def test_get_lighthouse(self, mock_run): - # Mock a lighthouse response - mock_response = {"categories": {"accessibility": {"score": 100}}} - mock_run.return_value = Mock( - stdout=json.dumps(mock_response).encode("utf-8") - ) - wrapper = LighthouseCalculator() - result = wrapper.get_lighthouse(url="http://example.com") - self.assertEqual( - result, {"categories": {"accessibility": {"score": 100}}}) - - @patch("subprocess.run") - def test_get_lighthouse_error(self, mock_run): - mock_run.side_effect = LighthouseError - wrapper = LighthouseCalculator() - with self.assertRaises(LighthouseError): - wrapper.get_lighthouse(url="http://example.com") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tests_services/test_technologies_calculator.py b/tests/tests_services/test_technologies_calculator.py deleted file mode 100644 index 8cf2c2d..0000000 --- a/tests/tests_services/test_technologies_calculator.py +++ /dev/null @@ -1,68 +0,0 @@ -import json -import unittest -from unittest.mock import patch, Mock - -from app.services.technologies_calculator import ( - TechnologiesCalculator, - TechnologiesError, -) - - -class TestTechnologiesCalculator(unittest.TestCase): - @patch("subprocess.run") - def test_get_technologies_success(self, mock_run): - # Setup - mock_result = Mock() - mock_result.stdout = json.dumps( - { - "technologies": [ - {"name": "Tech1", "confidence": 100}, - {"name": "Tech2", "confidence": 50}, - ] - } - ).encode() - mock_run.return_value = mock_result - - calc = TechnologiesCalculator() - - # Execution - result = calc.get_technologies("http://example.com") - - # Assertion - self.assertEqual(result, [{"name": "Tech1", "confidence": 100}]) - - @patch("subprocess.run") - def test_get_technologies_error(self, mock_run): - # Setup - mock_run.side_effect = Exception("Error running subprocess") - - calc = TechnologiesCalculator() - - # Execution & Assertion - with self.assertRaises(TechnologiesError): - calc.get_technologies("http://example.com") - - def test__be_agnostic(self): - # Setup - calc = TechnologiesCalculator() - input_data = [ - {"name": "Tech1", "confidence": 100}, - {"name": "Tech2", "confidence": 50}, - {"name": "Tech3", "confidence": 100}, - ] - - # Execution - result = calc._be_agnostic(input_data) - - # Assertion - self.assertEqual( - result, - [ - {"name": "Tech1", "confidence": 100}, - {"name": "Tech3", "confidence": 100}, - ], - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tests_services/test_url_cleaner.py b/tests/tests_services/test_url_cleaner.py deleted file mode 100644 index af8dc1a..0000000 --- a/tests/tests_services/test_url_cleaner.py +++ /dev/null @@ -1,28 +0,0 @@ -import unittest - -from app.services.url_cleaner import clean_url - - -class TestCleanUrl(unittest.TestCase): - def test_clean_url_no_changes_needed(self): - self.assertEqual(clean_url("http://example.com"), "http://example.com") - - def test_clean_url_remove_trailing_slash(self): - self.assertEqual(clean_url("http://example.com/"), "http://example.com") - - def test_clean_url_remove_spaces(self): - self.assertEqual(clean_url("http://exa mple.com"), "http://example.com") - - def test_clean_url_encode_decode(self): - # This test is essentially verifying that encoding and immediately decoding a string does not change it. - # The functionality might seem redundant, but it is present in your provided function. - self.assertEqual(clean_url("http://example.com"), "http://example.com") - - def test_clean_url_combined(self): - self.assertEqual( - clean_url("http://exa mple.com/ "), "http://example.com" - ) - - -if __name__ == "__main__": - unittest.main()