Skip to content

Commit

Permalink
Merge pull request #1031 from kga245/feature/unified-logs-handler
Browse files Browse the repository at this point in the history
Feature: unified logs handler
  • Loading branch information
ElishaKay authored Dec 21, 2024
2 parents d37418a + e1535bf commit 9c55fce
Show file tree
Hide file tree
Showing 21 changed files with 785 additions and 153 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,8 @@ docs/build
package-lock.json

#Vim swp files
*.swp
*.swp

# Log files
logs/
*.orig
16 changes: 16 additions & 0 deletions backend/server/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
import logging

logger = logging.getLogger(__name__)

app = FastAPI()

# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In production, replace with your frontend domain
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
83 changes: 83 additions & 0 deletions backend/server/logging_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import logging
import json
import os
from datetime import datetime
from pathlib import Path

class JSONResearchHandler:
def __init__(self, json_file):
self.json_file = json_file
self.research_data = {
"timestamp": datetime.now().isoformat(),
"events": [],
"content": {
"query": "",
"sources": [],
"context": [],
"report": "",
"costs": 0.0
}
}

def log_event(self, event_type: str, data: dict):
self.research_data["events"].append({
"timestamp": datetime.now().isoformat(),
"type": event_type,
"data": data
})
self._save_json()

def update_content(self, key: str, value):
self.research_data["content"][key] = value
self._save_json()

def _save_json(self):
with open(self.json_file, 'w') as f:
json.dump(self.research_data, f, indent=2)

def setup_research_logging():
# Create logs directory if it doesn't exist
logs_dir = Path("logs")
logs_dir.mkdir(exist_ok=True)

# Generate timestamp for log files
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Create log file paths
log_file = logs_dir / f"research_{timestamp}.log"
json_file = logs_dir / f"research_{timestamp}.json"

# Configure file handler for research logs
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))

# Get research logger and configure it
research_logger = logging.getLogger('research')
research_logger.setLevel(logging.INFO)

# Remove any existing handlers to avoid duplicates
research_logger.handlers.clear()

# Add file handler
research_logger.addHandler(file_handler)

# Add stream handler for console output
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
research_logger.addHandler(console_handler)

# Prevent propagation to root logger to avoid duplicate logs
research_logger.propagate = False

# Create JSON handler
json_handler = JSONResearchHandler(json_file)

return str(log_file), str(json_file), research_logger, json_handler

# Create a function to get the logger and JSON handler
def get_research_logger():
return logging.getLogger('research')

def get_json_handler():
return getattr(logging.getLogger('research'), 'json_handler', None)
26 changes: 26 additions & 0 deletions backend/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,26 @@
execute_multi_agents, handle_websocket_communication
)

from gpt_researcher.utils.logging_config import setup_research_logging

import logging

# Get logger instance
logger = logging.getLogger(__name__)

# Don't override parent logger settings
logger.propagate = True

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("server_log.txt"), # Log to file
logging.StreamHandler() # Also print to console
]
)


# Models


Expand Down Expand Up @@ -73,6 +93,12 @@ def startup_event():
os.makedirs("outputs", exist_ok=True)
app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
os.makedirs(DOC_PATH, exist_ok=True)

# Setup research logging
log_file, json_file, research_logger, json_handler = setup_research_logging() # Unpack all 4 values
research_logger.json_handler = json_handler # Store the JSON handler on the logger
research_logger.info(f"Research log file: {log_file}")
research_logger.info(f"Research JSON file: {json_file}")

# Routes

Expand Down
127 changes: 123 additions & 4 deletions backend/server/server_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,115 @@
import time
import shutil
from typing import Dict, List, Any
from fastapi.responses import JSONResponse
from fastapi.responses import JSONResponse, FileResponse
from gpt_researcher.document.document import DocumentLoader
# Add this import
from backend.utils import write_md_to_pdf, write_md_to_word, write_text_to_md
from pathlib import Path
from datetime import datetime
from fastapi import HTTPException
import logging

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

class CustomLogsHandler:
"""Custom handler to capture streaming logs from the research process"""
def __init__(self, websocket, task: str):
self.logs = []
self.websocket = websocket
sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{task}")
self.log_file = os.path.join("outputs", f"{sanitized_filename}.json")
self.timestamp = datetime.now().isoformat()
# Initialize log file with metadata
os.makedirs("outputs", exist_ok=True)
with open(self.log_file, 'w') as f:
json.dump({
"timestamp": self.timestamp,
"events": [],
"content": {
"query": "",
"sources": [],
"context": [],
"report": "",
"costs": 0.0
}
}, f, indent=2)

async def send_json(self, data: Dict[str, Any]) -> None:
"""Store log data and send to websocket"""
# Send to websocket for real-time display
if self.websocket:
await self.websocket.send_json(data)

# Read current log file
with open(self.log_file, 'r') as f:
log_data = json.load(f)

# Update appropriate section based on data type
if data.get('type') == 'logs':
log_data['events'].append({
"timestamp": datetime.now().isoformat(),
"type": "event",
"data": data
})
else:
# Update content section for other types of data
log_data['content'].update(data)

# Save updated log file
with open(self.log_file, 'w') as f:
json.dump(log_data, f, indent=2)
logger.debug(f"Log entry written to: {self.log_file}")


class Researcher:
def __init__(self, query: str, report_type: str = "research_report"):
self.query = query
self.report_type = report_type
# Generate unique ID for this research task
self.research_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{hash(query)}"
# Initialize logs handler with research ID
self.logs_handler = CustomLogsHandler(self.research_id)
self.researcher = GPTResearcher(
query=query,
report_type=report_type,
websocket=self.logs_handler
)

async def research(self) -> dict:
"""Conduct research and return paths to generated files"""
await self.researcher.conduct_research()
report = await self.researcher.write_report()

# Generate the files
sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{self.query}")
file_paths = await generate_report_files(report, sanitized_filename)

# Get the JSON log path that was created by CustomLogsHandler
json_relative_path = os.path.relpath(self.logs_handler.log_file)

return {
"output": {
**file_paths, # Include PDF, DOCX, and MD paths
"json": json_relative_path
}
}

def sanitize_filename(filename: str) -> str:
return re.sub(r"[^\w\s-]", "", filename).strip()
# Split into components
prefix, timestamp, *task_parts = filename.split('_')
task = '_'.join(task_parts)

# Calculate max length for task portion
# 255 - len("outputs/") - len("task_") - len(timestamp) - len("_.json") - safety_margin
max_task_length = 255 - 8 - 5 - 10 - 6 - 10 # ~216 chars for task

# Truncate task if needed
truncated_task = task[:max_task_length] if len(task) > max_task_length else task

# Reassemble and clean the filename
sanitized = f"{prefix}_{timestamp}_{truncated_task}"
return re.sub(r"[^\w\s-]", "", sanitized).strip()


async def handle_start_command(websocket, data: str, manager):
Expand All @@ -23,13 +124,31 @@ async def handle_start_command(websocket, data: str, manager):
print("Error: Missing task or report_type")
return

# Create logs handler with websocket and task
logs_handler = CustomLogsHandler(websocket, task)
# Initialize log content with query
await logs_handler.send_json({
"query": task,
"sources": [],
"context": [],
"report": ""
})

sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{task}")

report = await manager.start_streaming(
task, report_type, report_source, source_urls, tone, websocket, headers
task,
report_type,
report_source,
source_urls,
tone,
logs_handler,
headers
)
report = str(report)
file_paths = await generate_report_files(report, sanitized_filename)
# Add JSON log path to file_paths
file_paths["json"] = os.path.relpath(logs_handler.log_file)
await send_file_paths(websocket, file_paths)


Expand Down
12 changes: 5 additions & 7 deletions frontend/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,11 @@ <h2>Research Report</h2>
<div id="reportContainer"></div>
<div id="reportActions">
<div class="alert alert-info" role="alert" id="status"></div>
<a id="copyToClipboard" onclick="GPTResearcher.copyToClipboard()" class="btn btn-secondary mt-3"
style="margin-right: 10px;">Copy to clipboard (markdown)</a>
<a id="downloadLinkMd" href="#" class="btn btn-secondary mt-3" style="margin-right: 10px;"
target="_blank">Download as Markdown</a>
<a id="downloadLink" href="#" class="btn btn-secondary mt-3" style="margin-right: 10px;"
target="_blank">Download as PDF</a>
<a id="downloadLinkWord" href="#" class="btn btn-secondary mt-3" target="_blank">Download as Docx</a>
<a id="copyToClipboard" onclick="GPTResearcher.copyToClipboard()" class="btn btn-secondary mt-3" style="margin-right: 10px;">Copy to clipboard (markdown)</a>
<a id="downloadLinkMd" href="#" class="btn btn-secondary mt-3" style="margin-right: 10px;" target="_blank" rel="noopener noreferrer">Download as Markdown</a>
<a id="downloadLink" href="#" class="btn btn-secondary mt-3" style="margin-right: 10px;" target="_blank" rel="noopener noreferrer">Download as PDF</a>
<a id="downloadLinkWord" href="#" class="btn btn-secondary mt-3" style="margin-right: 10px;" target="_blank" rel="noopener noreferrer">Download as Docx</a>
<a id="downloadLinkJson" href="#" class="btn btn-secondary mt-3" style="margin-right: 10px;" target="_blank" rel="noopener noreferrer">Download Log</a>
</div>
</div>
</main>
Expand Down
1 change: 1 addition & 0 deletions frontend/nextjs/app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ export default function Home() {
orderedData={orderedData}
answer={answer}
allLogs={allLogs}
chatBoxSettings={chatBoxSettings}
handleClickSuggestion={handleClickSuggestion}
/>
</div>
Expand Down
Loading

0 comments on commit 9c55fce

Please sign in to comment.