Skip to content

Commit

Permalink
Merge branch 'DEV' into communities
Browse files Browse the repository at this point in the history
  • Loading branch information
kartikpersistent authored Sep 18, 2024
2 parents 3fecf70 + fec0d1e commit d6b2e6a
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 201 deletions.
4 changes: 3 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,6 @@ sentence-transformers==3.0.1
google-cloud-logging==3.10.0
PyMuPDF==1.24.5
pypandoc==1.13
graphdatascience==1.10
graphdatascience==1.10
Secweb==1.11.0

32 changes: 21 additions & 11 deletions backend/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
from datetime import datetime, timezone
import time
import gc
from Secweb import SecWeb
from Secweb.StrictTransportSecurity import HSTS
from Secweb.ContentSecurityPolicy import ContentSecurityPolicy
from Secweb.XContentTypeOptions import XContentTypeOptions
from Secweb.XFrameOptions import XFrame

logger = CustomLogger()
CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks")
Expand All @@ -43,6 +48,11 @@ def sick():
return False

app = FastAPI()
SecWeb(app=app, Option={'referrer': False, 'xframe': False})
app.add_middleware(HSTS, Option={'max-age': 4, 'preload': True})
app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False)
app.add_middleware(XContentTypeOptions)
app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'})

app.add_middleware(
CORSMiddleware,
Expand Down Expand Up @@ -106,7 +116,7 @@ async def create_source_knowledge_graph_url(

message = f"Source Node created successfully for source type: {source_type} and source: {source}"
json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct(json_obj)
logger.log_struct(json_obj, "INFO")
return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name)
except Exception as e:
error_message = str(e)
Expand Down Expand Up @@ -194,7 +204,7 @@ async def extract_knowledge_graph_from_file(
result['wiki_query'] = wiki_query
result['source_type'] = source_type
result['logging_time'] = formatted_time(datetime.now(timezone.utc))
logger.log_struct({"severity":"INFO","jsonPayload":result})
logger.log_struct(result, "INFO")
extract_api_time = time.time() - start_time
logging.info(f"extraction completed in {extract_api_time:.2f} seconds for file name {file_name}")
return create_api_response('Success', data=result, file_source= source_type)
Expand All @@ -213,7 +223,7 @@ async def extract_knowledge_graph_from_file(
logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}')
delete_uploaded_local_file(merged_file_path,file_name)
json_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"ERROR","jsonPayload":json_obj})
logger.log_struct(json_obj, "ERROR")
logging.exception(f'File Failed in extraction: {json_obj}')
return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name)
finally:
Expand All @@ -230,7 +240,7 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None
uri = uri.replace(" ","+")
result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database)
json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"INFO","jsonPayload":json_obj})
logger.log_struct(json_obj, "INFO")
return create_api_response("Success",data=result)
except Exception as e:
job_status = "Failed"
Expand Down Expand Up @@ -293,7 +303,7 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(),
result["info"]["response_time"] = round(total_call_time, 2)

json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"INFO","jsonPayload":json_obj})
logger.log_struct(json_obj, "INFO")

return create_api_response('Success',data=result)
except Exception as e:
Expand All @@ -311,7 +321,7 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=F
logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}")
result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,chunk_ids=chunk_ids,is_entity=json.loads(is_entity.lower()))
json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"INFO","jsonPayload":json_obj})
logger.log_struct(json_obj, "INFO")
return create_api_response('Success',data=result)
except Exception as e:
job_status = "Failed"
Expand Down Expand Up @@ -341,7 +351,7 @@ async def graph_query(
document_names=document_names
)
json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"INFO","jsonPayload":json_obj})
logger.log_struct(json_obj, "INFO")
return create_api_response('Success', data=result)
except Exception as e:
job_status = "Failed"
Expand Down Expand Up @@ -374,7 +384,7 @@ async def connect(uri=Form(), userName=Form(), password=Form(), database=Form())
graph = create_graph_database_connection(uri, userName, password, database)
result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph)
json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"INFO","jsonPayload":json_obj})
logger.log_struct(json_obj, "INFO")
return create_api_response('Success',data=result)
except Exception as e:
job_status = "Failed"
Expand All @@ -391,7 +401,7 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber
graph = create_graph_database_connection(uri, userName, password, database)
result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR)
json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"INFO","jsonPayload":json_obj})
logger.log_struct(json_obj, "INFO")
if int(chunkNumber) == int(totalChunks):
return create_api_response('Success',data=result, message='Source Node Created Successfully')
else:
Expand All @@ -413,7 +423,7 @@ async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), da
result = await asyncio.to_thread(get_labels_and_relationtypes, graph)
logging.info(f'Schema result from DB: {result}')
json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"INFO","jsonPayload":json_obj})
logger.log_struct(json_obj, "INFO")
return create_api_response('Success', data=result)
except Exception as e:
message="Unable to get the labels and relationtypes from neo4j database"
Expand Down Expand Up @@ -482,7 +492,7 @@ async def delete_document_and_entities(uri=Form(),
# entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0
message = f"Deleted {files_list_size} documents with entities from database"
json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct({"severity":"INFO","jsonPayload":json_obj})
logger.log_struct(json_obj, "INFO")
return create_api_response('Success',message=message)
except Exception as e:
job_status = "Failed"
Expand Down
6 changes: 3 additions & 3 deletions backend/src/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ def __init__(self):
else:
self.logger = None

def log_struct(self, message):
def log_struct(self, message, severity="DEFAULT"):
if self.is_gcp_log_enabled and message is not None:
self.logger.log_struct(message)
self.logger.log_struct({"message": message, "severity": severity})
else:
print(message)
print(f"[{severity}]{message}")
12 changes: 9 additions & 3 deletions backend/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY,
START_FROM_BEGINNING,
START_FROM_LAST_PROCESSED_POSITION,
DELETE_ENTITIES_AND_START_FROM_BEGINNING)
DELETE_ENTITIES_AND_START_FROM_BEGINNING,
QUERY_TO_GET_NODES_AND_RELATIONS_OF_A_DOCUMENT)
from src.shared.schema_extraction import schema_extraction_from_text
from langchain_community.document_loaders import GoogleApiClient, GoogleApiYoutubeLoader
from dotenv import load_dotenv
Expand Down Expand Up @@ -341,8 +342,13 @@ def processing_source(uri, userName, password, database, model, file_name, pages
obj_source_node.updated_at = end_time
obj_source_node.processing_time = processed_time
obj_source_node.processed_chunk = select_chunks_upto+select_chunks_with_retry
obj_source_node.node_count = node_count
obj_source_node.relationship_count = rel_count
if retry_condition == START_FROM_BEGINNING:
result = graph.query(QUERY_TO_GET_NODES_AND_RELATIONS_OF_A_DOCUMENT, params={"filename":file_name})
obj_source_node.node_count = result[0]['nodes']
obj_source_node.relationship_count = result[0]['rels']
else:
obj_source_node.node_count = node_count
obj_source_node.relationship_count = rel_count
graphDb_data_Access.update_source_node(obj_source_node)

result = graphDb_data_Access.get_current_status_document_node(file_name)
Expand Down
7 changes: 6 additions & 1 deletion backend/src/shared/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,12 @@
RETURN c.id as id,c.position as position
ORDER BY c.position LIMIT 1
"""

QUERY_TO_GET_NODES_AND_RELATIONS_OF_A_DOCUMENT = """
MATCH (d:Document)<-[:PART_OF]-(:Chunk)-[:HAS_ENTITY]->(e) where d.fileName=$filename
OPTIONAL MATCH (d)<-[:PART_OF]-(:Chunk)-[:HAS_ENTITY]->(e2:!Chunk)-[rel]-(e)
RETURN count(DISTINCT e) as nodes, count(DISTINCT rel) as rels
"""

START_FROM_BEGINNING = "start_from_beginning"
DELETE_ENTITIES_AND_START_FROM_BEGINNING = "delete_entities_and_start_from_beginning"
START_FROM_LAST_PROCESSED_POSITION = "start_from_last_processed_position"
Loading

0 comments on commit d6b2e6a

Please sign in to comment.