This commit is contained in:
2025-11-18 14:45:41 +01:00
parent 2e80d64ff6
commit 84fce9a82c
19 changed files with 2437 additions and 3 deletions

View File

@@ -0,0 +1,221 @@
#!/usr/bin/env python3
"""
Comprehensive test suite for the personalization system.
Tests all 4 phases: keyword extraction, click tracking, interest profiling, and personalization.
"""
import sys
from pymongo import MongoClient
from datetime import datetime
# Import services
from services.tracking_service import create_newsletter_tracking
from services.interest_profiling_service import (
update_user_interests,
get_user_interests,
get_top_interests,
build_interests_from_history
)
from services.personalization_service import (
calculate_article_score,
rank_articles_for_user,
select_personalized_articles,
get_personalization_stats
)
from config import Config
# Connect to MongoDB
client = MongoClient(Config.MONGODB_URI)
db = client[Config.DB_NAME]
articles_collection = db['articles']
link_clicks_collection = db['link_clicks']
user_interests_collection = db['user_interests']
def test_phase1_keywords():
"""Phase 1: Verify articles have keywords extracted"""
print("\n" + "="*60)
print("Phase 1: Keyword Extraction")
print("="*60)
articles_with_keywords = articles_collection.count_documents({
'keywords': {'$exists': True, '$ne': []}
})
if articles_with_keywords == 0:
print("❌ No articles with keywords found")
print(" Run a crawl first to extract keywords")
return False
sample = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}})
print(f"✓ Found {articles_with_keywords} articles with keywords")
print(f" Sample: {sample.get('title', 'N/A')[:50]}...")
print(f" Keywords: {sample.get('keywords', [])[:3]}")
return True
def test_phase2_tracking():
"""Phase 2: Verify tracking includes keywords and metadata"""
print("\n" + "="*60)
print("Phase 2: Click Tracking Enhancement")
print("="*60)
test_email = 'test-phase2@example.com'
# Clean up
link_clicks_collection.delete_many({'subscriber_email': test_email})
# Get article with keywords
article = articles_collection.find_one({'keywords': {'$exists': True, '$ne': []}})
if not article:
print("❌ No articles found")
return False
# Create tracking
tracking_data = create_newsletter_tracking(
newsletter_id='test-phase2',
subscriber_email=test_email,
article_links=[{
'url': article['link'],
'title': article.get('title', '')
}]
)
# Verify tracking record
tracking_id = list(tracking_data['link_tracking_map'].values())[0]
tracking_record = link_clicks_collection.find_one({'tracking_id': tracking_id})
has_metadata = (
tracking_record.get('article_id') is not None and
tracking_record.get('category') is not None and
len(tracking_record.get('keywords', [])) > 0
)
# Clean up
link_clicks_collection.delete_many({'subscriber_email': test_email})
db['newsletter_sends'].delete_many({'subscriber_email': test_email})
if has_metadata:
print(f"✓ Tracking records include metadata")
print(f" Article ID: {tracking_record.get('article_id')}")
print(f" Category: {tracking_record.get('category')}")
print(f" Keywords: {len(tracking_record.get('keywords', []))} keywords")
return True
else:
print("❌ Tracking records missing metadata")
return False
def test_phase3_profiling():
"""Phase 3: Verify interest profiles are built from clicks"""
print("\n" + "="*60)
print("Phase 3: User Interest Profiling")
print("="*60)
test_email = 'test-phase3@example.com'
# Clean up
user_interests_collection.delete_many({'email': test_email})
# Create profile
update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports')
update_user_interests(test_email, ['Transportation', 'Munich'], 'local')
# Verify profile
profile = get_user_interests(test_email)
# Clean up
user_interests_collection.delete_many({'email': test_email})
if profile and profile['total_clicks'] == 2:
print(f"✓ Interest profile created")
print(f" Total clicks: {profile['total_clicks']}")
print(f" Categories: {len(profile.get('categories', {}))}")
print(f" Keywords: {len(profile.get('keywords', {}))}")
return True
else:
print("❌ Interest profile not created correctly")
return False
def test_phase4_personalization():
"""Phase 4: Verify articles are ranked by user interests"""
print("\n" + "="*60)
print("Phase 4: Personalized Newsletter Generation")
print("="*60)
test_email = 'test-phase4@example.com'
# Clean up
user_interests_collection.delete_many({'email': test_email})
# Get articles
articles = list(articles_collection.find(
{'keywords': {'$exists': True, '$ne': []}},
limit=5
))
if len(articles) < 3:
print("❌ Not enough articles found")
return False
# Create profile
update_user_interests(test_email, ['Bayern Munich', 'Football'], 'sports')
# Rank articles
ranked = rank_articles_for_user(articles, test_email)
# Select personalized
selected = select_personalized_articles(articles, test_email, max_articles=3)
# Clean up
user_interests_collection.delete_many({'email': test_email})
has_scores = all('personalization_score' in a for a in selected)
if has_scores and len(selected) > 0:
print(f"✓ Articles ranked and selected")
print(f" Total ranked: {len(ranked)}")
print(f" Selected: {len(selected)}")
print(f" Top score: {selected[0].get('personalization_score', 0):.3f}")
return True
else:
print("❌ Personalization failed")
return False
def main():
"""Run all personalization tests"""
print("\n" + "="*60)
print("PERSONALIZATION SYSTEM TEST SUITE")
print("="*60)
results = {
'Phase 1: Keyword Extraction': test_phase1_keywords(),
'Phase 2: Click Tracking': test_phase2_tracking(),
'Phase 3: Interest Profiling': test_phase3_profiling(),
'Phase 4: Personalization': test_phase4_personalization()
}
print("\n" + "="*60)
print("TEST RESULTS")
print("="*60)
for phase, passed in results.items():
status = "✅ PASS" if passed else "❌ FAIL"
print(f"{status} - {phase}")
all_passed = all(results.values())
if all_passed:
print("\n🎉 All personalization tests PASSED!")
return 0
else:
print("\n❌ Some tests FAILED")
return 1
if __name__ == '__main__':
sys.exit(main())