Skip to content

Commit

Permalink
trends processing more structured (best, worst)
Browse files Browse the repository at this point in the history
  • Loading branch information
josancamon19 committed Sep 17, 2024
1 parent 16deb1a commit f72d3c4
Show file tree
Hide file tree
Showing 4 changed files with 238 additions and 32 deletions.
33 changes: 20 additions & 13 deletions backend/database/trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,20 @@ def get_trends_data() -> List[Dict]:
trends_docs = [doc for doc in trends_ref.stream(retry=Retry())]
trends_data = []
for category in trends_docs:
category_data = category.to_dict()

category_topics_ref = trends_ref.document(category_data['id']).collection('topics')
topics_docs = [topic.to_dict() for topic in category_topics_ref.stream(retry=Retry())]
topics = sorted(topics_docs, key=lambda e: len(e['memory_ids']), reverse=True)
for topic in topics:
topic['memories_count'] = len(topic['memory_ids'])
del topic['memory_ids']

category_data['topics'] = topics
trends_data.append(category_data)
try:
category_data = category.to_dict()

category_topics_ref = trends_ref.document(category_data['id']).collection('topics')
topics_docs = [topic.to_dict() for topic in category_topics_ref.stream(retry=Retry())]
topics = sorted(topics_docs, key=lambda e: len(e['memory_ids']), reverse=True)
for topic in topics:
topic['memories_count'] = len(topic['memory_ids'])
del topic['memory_ids']

category_data['topics'] = topics
trends_data.append(category_data)
except Exception as e:
continue
return trends_data


Expand All @@ -34,10 +37,14 @@ def save_trends(memory: Memory, trends: List[Trend]):
for trend in trends:
category = trend.category.value
topics = trend.topics
category_id = document_id_from_seed(category)
trend_type = trend.type.value
category_id = document_id_from_seed(category + trend_type)
category_doc_ref = trends_coll_ref.document(category_id)

category_doc_ref.set({"id": category_id, "category": category, "created_at": datetime.utcnow()}, merge=True)
category_doc_ref.set(
{"id": category_id, "category": category, "type": trend_type, "created_at": datetime.utcnow()},
merge=True
)

topics_coll_ref = category_doc_ref.collection('topics')

Expand Down
213 changes: 203 additions & 10 deletions backend/models/trend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,213 @@


class TrendEnum(str, Enum):
acquisition = "acquisition"
ceo = "ceo"
company = "company"
event = "event"
founder = "founder"
industry = "industry"
innovation = "innovation"
investment = "investment"
partnership = "partnership"
product = "product"
research = "research"
tool = "tool"
software_product = "software_product"
hardware_product = "hardware_product"
ai_product = "ai_product"


class TrendType(str, Enum):
best = "best"
worst = "worst"


class Trend(BaseModel):
category: TrendEnum = Field(description="The category identified")
type: TrendType = Field(description="The type of trend identified")
topics: List[str] = Field(description="The specific topic corresponding the category")


ceo_options = [
"Elon Musk",
"Sundar Pichai",
"Satya Nadella",
"Jensen Huang",
"Andy Jassy",
"Ryan Breslow",
"Henrique Dubugras",
"Alexandr Wang",
"Tim Cook",
"Marc Benioff",
"Dylan Field",
"Parag Agrawal",
"Brian Chesky",
"Patrick Collison",
"Andrew Wilson",
"Lisa Su",
"Austin Russell",
"Sam Altman",
"Darius Adamczyk",
"Shantanu Narayen",
"Bob Chapek",
"Mark Zuckerberg",
"David Zaslav",
"Mary Barra",
"Howard Schultz",
"Raj Subramaniam",
"Arvind Krishna",
"Adam Neumann",
"Vlad Tenev",
"Dara Khosrowshahi",
"Fran Horowitz",
"Yuanqing Yang",
"Frank Slootman",
"William McDermott",
"Anthony Wood",
"Roland Busch",
"Christian Klein",
"Kazuhiro Tsuga",
"Stéphane Bancel"
]

company_options = [
"Microsoft",
"Nvidia",
"Amazon",
"Apple",
"Tesla",
"Salesforce",
"Shopify",
"Google/Alphabet",
"SpaceX",
"OpenAI",
"Brex",
"Stripe",
"Adobe",
"Zoom",
"Figma",
"Databricks",
"GitHub",
"Luminar",
"Airbnb",
"Square",
"Meta",
"Warner Bros. Discovery",
"Disney",
"X (formerly Twitter)",
"BP",
"Robinhood",
"Peloton",
"Boeing",
"WeWork",
"FedEx",
"AT&T",
"IBM",
"Frontier Airlines",
"Uber",
"Juul",
"TikTok",
"Snapchat",
"Nestlé",
"Facebook",
"GameStop"
]

software_product_options = [
"Microsoft Copilot",
"OpenAI GPT-4",
"Slack",
"Google Workspace",
"Zoom",
"Salesforce CRM",
"Adobe Photoshop",
"Figma",
"Datadog",
"ServiceNow",
"HubSpot",
"Notion",
"Tableau",
"Monday.com",
"GitHub Copilot",
"Asana",
"Trello",
"Snowflake",
"Atlassian Jira",
"ZoomInfo",
"Meta Horizon Worlds",
"Robinhood",
"Oracle",
"Evernote",
"Google Stadia",
"Facebook Workplace",
"SAP S/4HANA",
"IBM Watson",
"Quibi",
"Kaspersky",
"Palantir",
"Clubhouse",
"Slack Threads",
"TikTok’s Creator Tools",
"Samsung Bixby",
"Salesforce Marketing Cloud",
"Microsoft Teams",
"Intel AI Suite",
"Uber Driver App"
]

hardware_product_options = [
"Tesla Cybertruck",
"iPhone 16",
"MacBook Pro",
"Nvidia RTX 5090",
"SpaceX Starship",
"Amazon Echo",
"Sony PlayStation 6",
"Microsoft Surface Pro 10",
"Dyson V16",
"Luminar LiDAR",
"DJI Mavic 3",
"Apple Vision Pro",
"Google Pixel 9",
"Framework Laptop",
"Oculus Quest 4",
"Logitech G Pro X",
"Samsung Galaxy Fold 4",
"Fitbit Charge 6",
"Apple Watch 9",
"Lumix S5 II"
]

ai_product_options = [
"OpenAI GPT-5",
"Google DeepMind",
"Nvidia Omniverse",
"Microsoft Copilot",
"Tesla FSD",
"Amazon Alexa",
"Salesforce Einstein",
"Palantir Foundry",
"Scale AI",
"Grammarly AI",
"MidJourney",
"Hugging Face",
"Runway Gen-2",
"Anthropic Claude 2",
"Cohere",
"Databricks AI",
"Hugging Face Transformers",
"Notion AI",
"Synthesia",
"Jasper AI",
"Meta AI",
"IBM Watson",
"Clearview AI",
"Facebook AI",
"Google Duplex",
"Samsung Bixby",
"Twitter AI moderation",
"Microsoft Tay",
"Replika",
"Clear AI",
"TikTok AI",
"Robinhood AI Trading",
"Meta Horizon AI",
"Ring AI",
"Uber AI",
"Watson Health",
"ChatGPT clones",
"Tinder AI",
"Zoom AI transcription",
"Salesforce AI"
]
22 changes: 14 additions & 8 deletions backend/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from models.memory import Structured, MemoryPhoto, CategoryEnum, Memory
from models.plugin import Plugin
from models.transcript_segment import TranscriptSegment
from models.trend import TrendEnum
from models.trend import TrendEnum, ceo_options, company_options, software_product_options, hardware_product_options, \
ai_product_options, TrendType
from utils.memories.facts import get_prompt_facts

llm_mini = ChatOpenAI(model='gpt-4o-mini')
Expand Down Expand Up @@ -472,6 +473,7 @@ def new_facts_extractor(uid: str, segments: List[TranscriptSegment]) -> List[Fac

class Item(BaseModel):
category: TrendEnum = Field(description="The category identified")
type: TrendType = Field(description="The sentiment identified")
topic: str = Field(description="The specific topic corresponding the category")


Expand All @@ -487,16 +489,20 @@ def trends_extractor(memory: Memory) -> List[Item]:
prompt = f'''
You will be given a finished conversation transcript.
You are responsible for extracting the topics of the conversation and classifying each one within one the following categories: {str([e.value for e in TrendEnum]).strip("[]")}.
You must identify if the perception is positive or negative, and classify it as "best" or "worst".
Each topic must be a person, company, event, technology, product, research, innovation, acquisition, partnership, investment, founder, CEO, industry, or any other relevant topic.
It can't be a non-specific topic like "the weather" or "the economy".
For the specific topics here are the options available, you must classify the topic within one of these options:
- ceo_options: {", ".join(ceo_options)}
- company_options: {", ".join(company_options)}
- software_product_options: {", ".join(software_product_options)}
- hardware_product_options: {", ".join(hardware_product_options)}
- ai_product_options: {", ".join(ai_product_options)}
For example,
If you identify the topic "Tesla", you should classify it as "company".
If you identify the topic "Elon Musk", you should classify it as "ceo".
If you identify the topic "Dreamforce", you should classify it as "event".
If you identify the topic "GPT O1", you should classify it as "tool".
If you identify the topic "Tesla stock has been going up incredibly", you should output:
- Category: company
- Type: best
- Topic: Tesla
Conversation:
{transcript}
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/memories/process_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _extract_facts(uid: str, memory: Memory):

def _extract_trends(memory: Memory):
extracted_items = trends_extractor(memory)
parsed = [Trend(category=item.category, topics=[item.topic]) for item in extracted_items]
parsed = [Trend(category=item.category, topics=[item.topic], type=item.type) for item in extracted_items]
trends_db.save_trends(memory, parsed)


Expand Down

0 comments on commit f72d3c4

Please sign in to comment.