Add ChromaDB implementation
This commit is contained in:
@@ -14,7 +14,9 @@ from rss_utils import extract_article_url, extract_article_summary, extract_publ
|
||||
from config import Config
|
||||
from ollama_client import OllamaClient
|
||||
from article_clustering import ArticleClusterer
|
||||
from article_clustering import ArticleClusterer
|
||||
from cluster_summarizer import create_cluster_summaries
|
||||
from chroma_client import ChromaClient
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(dotenv_path='../.env')
|
||||
@@ -38,6 +40,13 @@ ollama_client = OllamaClient(
|
||||
# Initialize Article Clusterer (will be initialized after ollama_client)
|
||||
article_clusterer = None
|
||||
|
||||
# Initialize ChromaDB client
|
||||
chroma_client = ChromaClient(
|
||||
host=Config.CHROMA_HOST,
|
||||
port=Config.CHROMA_PORT,
|
||||
collection_name=Config.CHROMA_COLLECTION
|
||||
)
|
||||
|
||||
# Print configuration on startup
|
||||
if __name__ != '__main__':
|
||||
Config.print_config()
|
||||
@@ -440,6 +449,17 @@ def crawl_rss_feed(feed_url, feed_name, feed_category='general', max_articles=10
|
||||
crawled_count += 1
|
||||
print(f" ✓ Saved ({article_data.get('word_count', 0)} words)")
|
||||
|
||||
# Index in ChromaDB
|
||||
try:
|
||||
# Add mongo _id to article doc for reference
|
||||
saved_article = articles_collection.find_one({'link': article_url})
|
||||
if saved_article:
|
||||
article_doc['_id'] = str(saved_article['_id'])
|
||||
chroma_client.add_articles([article_doc])
|
||||
except Exception as e:
|
||||
print(f" ⚠ Failed to index in ChromaDB: {e}")
|
||||
|
||||
|
||||
except DuplicateKeyError:
|
||||
print(f" ⚠ Duplicate key error")
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user