#!/usr/bin/env python3 """ Test neutral summary generation from clustered articles """ from pymongo import MongoClient from datetime import datetime import sys # Connect to MongoDB client = MongoClient("mongodb://admin:changeme@mongodb:27017/") db = client["munich_news"] print("Testing Neutral Summary Generation") print("=" * 70) print() # Check for test articles test_articles = list(db.articles.find( {"link": {"$regex": "^https://example.com/"}} ).sort("_id", 1)) if len(test_articles) == 0: print("⚠ No test articles found. Run test-clustering-real.py first.") sys.exit(1) print(f"Found {len(test_articles)} test articles") print() # Find clusters with multiple articles clusters = {} for article in test_articles: cid = article['cluster_id'] if cid not in clusters: clusters[cid] = [] clusters[cid].append(article) multi_article_clusters = {k: v for k, v in clusters.items() if len(v) > 1} if len(multi_article_clusters) == 0: print("⚠ No clusters with multiple articles found") sys.exit(1) print(f"Found {len(multi_article_clusters)} cluster(s) with multiple articles") print() # Import cluster summarizer sys.path.insert(0, '/app') from ollama_client import OllamaClient from cluster_summarizer import ClusterSummarizer from config import Config # Initialize ollama_client = OllamaClient( base_url=Config.OLLAMA_BASE_URL, model=Config.OLLAMA_MODEL, enabled=Config.OLLAMA_ENABLED, timeout=60 ) summarizer = ClusterSummarizer(ollama_client, max_words=200) print("Generating neutral summaries...") print("=" * 70) print() for cluster_id, articles in multi_article_clusters.items(): print(f"Cluster: {cluster_id}") print(f"Articles: {len(articles)}") print() # Show individual articles for i, article in enumerate(articles, 1): print(f" {i}. [{article['source']}] {article['title'][:60]}...") print() # Generate neutral summary print(" Generating neutral summary...") result = summarizer.generate_neutral_summary(articles) if result['success']: print(f" ✓ Success ({result['duration']:.1f}s)") print() print(" Neutral Summary:") print(" " + "-" * 66) # Wrap text at 66 chars summary = result['neutral_summary'] words = summary.split() lines = [] current_line = " " for word in words: if len(current_line) + len(word) + 1 <= 68: current_line += word + " " else: lines.append(current_line.rstrip()) current_line = " " + word + " " if current_line.strip(): lines.append(current_line.rstrip()) print("\n".join(lines)) print(" " + "-" * 66) print() # Save to database db.cluster_summaries.update_one( {"cluster_id": cluster_id}, { "$set": { "cluster_id": cluster_id, "neutral_summary": result['neutral_summary'], "sources": result['sources'], "article_count": result['article_count'], "created_at": datetime.utcnow(), "updated_at": datetime.utcnow() } }, upsert=True ) print(" ✓ Saved to cluster_summaries collection") else: print(f" ✗ Failed: {result['error']}") print() print("=" * 70) print() print("Testing complete!") print() # Show summary statistics total_cluster_summaries = db.cluster_summaries.count_documents({}) print(f"Total cluster summaries in database: {total_cluster_summaries}")