This commit is contained in:
2025-11-11 14:09:21 +01:00
parent bcd0a10576
commit 1075a91eac
57 changed files with 5598 additions and 1366 deletions

View File

@@ -0,0 +1,451 @@
#!/usr/bin/env python
"""
Test analytics functionality for email tracking
Run from backend directory with venv activated:
cd backend
source venv/bin/activate # or venv\Scripts\activate on Windows
python test_analytics.py
"""
import sys
import os
from datetime import datetime, timedelta
# Add backend directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from services.analytics_service import (
get_open_rate,
get_click_rate,
get_newsletter_metrics,
get_article_performance,
get_subscriber_activity_status,
update_subscriber_activity_statuses
)
from database import (
newsletter_sends_collection,
link_clicks_collection,
subscriber_activity_collection
)
from app import app
print("\n" + "="*80)
print("Analytics Service Tests")
print("="*80)
# Test counters
tests_passed = 0
tests_failed = 0
def test_result(test_name, passed, message=""):
"""Print test result"""
global tests_passed, tests_failed
if passed:
tests_passed += 1
print(f"{test_name}")
if message:
print(f" {message}")
else:
tests_failed += 1
print(f"{test_name}")
if message:
print(f" {message}")
# Setup test data
print("\n" + "-"*80)
print("Setting up test data...")
print("-"*80)
try:
# Clean up existing test data
newsletter_sends_collection.delete_many({'newsletter_id': {'$regex': '^test-analytics-'}})
link_clicks_collection.delete_many({'newsletter_id': {'$regex': '^test-analytics-'}})
subscriber_activity_collection.delete_many({'email': {'$regex': '^test-analytics-'}})
# Create test newsletter sends
test_newsletter_id = 'test-analytics-newsletter-001'
# Create 10 newsletter sends: 7 opened, 3 not opened
for i in range(10):
opened = i < 7 # First 7 are opened
doc = {
'newsletter_id': test_newsletter_id,
'subscriber_email': f'test-analytics-user{i}@example.com',
'tracking_id': f'test-pixel-{i}',
'sent_at': datetime.utcnow(),
'opened': opened,
'first_opened_at': datetime.utcnow() if opened else None,
'last_opened_at': datetime.utcnow() if opened else None,
'open_count': 1 if opened else 0,
'created_at': datetime.utcnow()
}
newsletter_sends_collection.insert_one(doc)
# Create test link clicks for an article
test_article_url = 'https://example.com/test-analytics-article'
# Create 10 link tracking records: 4 clicked, 6 not clicked
for i in range(10):
clicked = i < 4 # First 4 are clicked
doc = {
'tracking_id': f'test-link-{i}',
'newsletter_id': test_newsletter_id,
'subscriber_email': f'test-analytics-user{i}@example.com',
'article_url': test_article_url,
'article_title': 'Test Analytics Article',
'clicked': clicked,
'clicked_at': datetime.utcnow() if clicked else None,
'user_agent': 'Test Agent' if clicked else None,
'created_at': datetime.utcnow()
}
link_clicks_collection.insert_one(doc)
print("✓ Test data created")
except Exception as e:
print(f"❌ Error setting up test data: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(1)
# Test 1: Open Rate Calculation
print("\n" + "-"*80)
print("Test 1: Open Rate Calculation")
print("-"*80)
try:
open_rate = get_open_rate(test_newsletter_id)
# Expected: 7 out of 10 = 70%
is_correct = open_rate == 70.0
test_result("Calculate open rate", is_correct, f"Open rate: {open_rate}% (expected 70%)")
# Test with non-existent newsletter
open_rate_empty = get_open_rate('non-existent-newsletter')
handles_empty = open_rate_empty == 0.0
test_result("Handle non-existent newsletter", handles_empty,
f"Open rate: {open_rate_empty}% (expected 0%)")
except Exception as e:
test_result("Open rate calculation", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 2: Click Rate Calculation
print("\n" + "-"*80)
print("Test 2: Click Rate Calculation")
print("-"*80)
try:
click_rate = get_click_rate(test_article_url)
# Expected: 4 out of 10 = 40%
is_correct = click_rate == 40.0
test_result("Calculate click rate", is_correct, f"Click rate: {click_rate}% (expected 40%)")
# Test with non-existent article
click_rate_empty = get_click_rate('https://example.com/non-existent')
handles_empty = click_rate_empty == 0.0
test_result("Handle non-existent article", handles_empty,
f"Click rate: {click_rate_empty}% (expected 0%)")
except Exception as e:
test_result("Click rate calculation", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 3: Newsletter Metrics
print("\n" + "-"*80)
print("Test 3: Newsletter Metrics")
print("-"*80)
try:
metrics = get_newsletter_metrics(test_newsletter_id)
# Verify all expected fields
has_all_fields = all(key in metrics for key in [
'newsletter_id', 'total_sent', 'total_opened', 'open_rate',
'total_clicks', 'unique_clickers', 'click_through_rate'
])
test_result("Returns all required fields", has_all_fields)
# Verify values
correct_sent = metrics['total_sent'] == 10
test_result("Correct total_sent", correct_sent, f"Total sent: {metrics['total_sent']}")
correct_opened = metrics['total_opened'] == 7
test_result("Correct total_opened", correct_opened, f"Total opened: {metrics['total_opened']}")
correct_open_rate = metrics['open_rate'] == 70.0
test_result("Correct open_rate", correct_open_rate, f"Open rate: {metrics['open_rate']}%")
correct_clicks = metrics['total_clicks'] == 4
test_result("Correct total_clicks", correct_clicks, f"Total clicks: {metrics['total_clicks']}")
correct_unique_clickers = metrics['unique_clickers'] == 4
test_result("Correct unique_clickers", correct_unique_clickers,
f"Unique clickers: {metrics['unique_clickers']}")
correct_ctr = metrics['click_through_rate'] == 40.0
test_result("Correct click_through_rate", correct_ctr,
f"CTR: {metrics['click_through_rate']}%")
except Exception as e:
test_result("Newsletter metrics", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 4: Article Performance
print("\n" + "-"*80)
print("Test 4: Article Performance")
print("-"*80)
try:
performance = get_article_performance(test_article_url)
# Verify all expected fields
has_all_fields = all(key in performance for key in [
'article_url', 'total_sent', 'total_clicks', 'click_rate',
'unique_clickers', 'newsletters'
])
test_result("Returns all required fields", has_all_fields)
# Verify values
correct_sent = performance['total_sent'] == 10
test_result("Correct total_sent", correct_sent, f"Total sent: {performance['total_sent']}")
correct_clicks = performance['total_clicks'] == 4
test_result("Correct total_clicks", correct_clicks, f"Total clicks: {performance['total_clicks']}")
correct_click_rate = performance['click_rate'] == 40.0
test_result("Correct click_rate", correct_click_rate, f"Click rate: {performance['click_rate']}%")
correct_unique = performance['unique_clickers'] == 4
test_result("Correct unique_clickers", correct_unique,
f"Unique clickers: {performance['unique_clickers']}")
has_newsletters = len(performance['newsletters']) > 0
test_result("Returns newsletter list", has_newsletters,
f"Newsletters: {performance['newsletters']}")
except Exception as e:
test_result("Article performance", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 5: Activity Status Classification
print("\n" + "-"*80)
print("Test 5: Activity Status Classification")
print("-"*80)
try:
# Create test data for activity classification
now = datetime.utcnow()
# Active user (opened 10 days ago)
newsletter_sends_collection.insert_one({
'newsletter_id': 'test-analytics-activity',
'subscriber_email': 'test-analytics-active@example.com',
'tracking_id': 'test-active-pixel',
'sent_at': now - timedelta(days=10),
'opened': True,
'first_opened_at': now - timedelta(days=10),
'last_opened_at': now - timedelta(days=10),
'open_count': 1,
'created_at': now - timedelta(days=10)
})
# Inactive user (opened 45 days ago)
newsletter_sends_collection.insert_one({
'newsletter_id': 'test-analytics-activity',
'subscriber_email': 'test-analytics-inactive@example.com',
'tracking_id': 'test-inactive-pixel',
'sent_at': now - timedelta(days=45),
'opened': True,
'first_opened_at': now - timedelta(days=45),
'last_opened_at': now - timedelta(days=45),
'open_count': 1,
'created_at': now - timedelta(days=45)
})
# Dormant user (opened 90 days ago)
newsletter_sends_collection.insert_one({
'newsletter_id': 'test-analytics-activity',
'subscriber_email': 'test-analytics-dormant@example.com',
'tracking_id': 'test-dormant-pixel',
'sent_at': now - timedelta(days=90),
'opened': True,
'first_opened_at': now - timedelta(days=90),
'last_opened_at': now - timedelta(days=90),
'open_count': 1,
'created_at': now - timedelta(days=90)
})
# New user (never opened)
newsletter_sends_collection.insert_one({
'newsletter_id': 'test-analytics-activity',
'subscriber_email': 'test-analytics-new@example.com',
'tracking_id': 'test-new-pixel',
'sent_at': now - timedelta(days=5),
'opened': False,
'first_opened_at': None,
'last_opened_at': None,
'open_count': 0,
'created_at': now - timedelta(days=5)
})
# Test classifications
active_status = get_subscriber_activity_status('test-analytics-active@example.com')
is_active = active_status == 'active'
test_result("Classify active user", is_active, f"Status: {active_status}")
inactive_status = get_subscriber_activity_status('test-analytics-inactive@example.com')
is_inactive = inactive_status == 'inactive'
test_result("Classify inactive user", is_inactive, f"Status: {inactive_status}")
dormant_status = get_subscriber_activity_status('test-analytics-dormant@example.com')
is_dormant = dormant_status == 'dormant'
test_result("Classify dormant user", is_dormant, f"Status: {dormant_status}")
new_status = get_subscriber_activity_status('test-analytics-new@example.com')
is_new = new_status == 'new'
test_result("Classify new user", is_new, f"Status: {new_status}")
except Exception as e:
test_result("Activity status classification", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 6: Batch Update Activity Statuses
print("\n" + "-"*80)
print("Test 6: Batch Update Activity Statuses")
print("-"*80)
try:
updated_count = update_subscriber_activity_statuses()
# Should update all test subscribers
has_updates = updated_count > 0
test_result("Updates subscriber records", has_updates,
f"Updated {updated_count} subscribers")
# Verify a record was created
activity_record = subscriber_activity_collection.find_one({
'email': 'test-analytics-active@example.com'
})
record_exists = activity_record is not None
test_result("Creates activity record", record_exists)
if activity_record:
has_required_fields = all(key in activity_record for key in [
'email', 'status', 'total_opens', 'total_clicks',
'newsletters_received', 'newsletters_opened', 'updated_at'
])
test_result("Activity record has required fields", has_required_fields)
correct_status = activity_record['status'] == 'active'
test_result("Activity record has correct status", correct_status,
f"Status: {activity_record['status']}")
except Exception as e:
test_result("Batch update activity statuses", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 7: Analytics API Endpoints
print("\n" + "-"*80)
print("Test 7: Analytics API Endpoints")
print("-"*80)
try:
with app.test_client() as client:
# Test newsletter analytics endpoint
response = client.get(f'/api/analytics/newsletter/{test_newsletter_id}')
is_200 = response.status_code == 200
test_result("Newsletter endpoint returns 200", is_200, f"Status: {response.status_code}")
if is_200:
data = response.get_json()
has_data = data is not None and 'open_rate' in data
test_result("Newsletter endpoint returns data", has_data)
# Test article analytics endpoint
response = client.get(f'/api/analytics/article/{test_article_url}')
is_200 = response.status_code == 200
test_result("Article endpoint returns 200", is_200, f"Status: {response.status_code}")
if is_200:
data = response.get_json()
has_data = data is not None and 'click_rate' in data
test_result("Article endpoint returns data", has_data)
# Test subscriber analytics endpoint
response = client.get('/api/analytics/subscriber/test-analytics-active@example.com')
is_200 = response.status_code == 200
test_result("Subscriber endpoint returns 200", is_200, f"Status: {response.status_code}")
if is_200:
data = response.get_json()
has_data = data is not None and 'status' in data
test_result("Subscriber endpoint returns data", has_data)
# Test update activity endpoint
response = client.post('/api/analytics/update-activity')
is_200 = response.status_code == 200
test_result("Update activity endpoint returns 200", is_200, f"Status: {response.status_code}")
if is_200:
data = response.get_json()
has_count = data is not None and 'updated_count' in data
test_result("Update activity endpoint returns count", has_count)
except Exception as e:
test_result("Analytics API endpoints", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Clean up test data
print("\n" + "-"*80)
print("Cleaning up test data...")
print("-"*80)
try:
newsletter_sends_collection.delete_many({'newsletter_id': {'$regex': '^test-analytics-'}})
link_clicks_collection.delete_many({'newsletter_id': {'$regex': '^test-analytics-'}})
subscriber_activity_collection.delete_many({'email': {'$regex': '^test-analytics-'}})
print("✓ Test data cleaned up")
except Exception as e:
print(f"⚠ Error cleaning up: {str(e)}")
# Summary
print("\n" + "="*80)
print("TEST SUMMARY")
print("="*80)
print(f"Total tests: {tests_passed + tests_failed}")
print(f"✓ Passed: {tests_passed}")
print(f"❌ Failed: {tests_failed}")
if tests_failed == 0:
print("\n🎉 All tests passed!")
else:
print(f"\n{tests_failed} test(s) failed")
print("="*80 + "\n")
# Exit with appropriate code
sys.exit(0 if tests_failed == 0 else 1)

View File

@@ -0,0 +1,389 @@
#!/usr/bin/env python
"""
Test privacy compliance features for email tracking
Run from backend directory with venv activated:
cd backend
source venv/bin/activate # or venv\Scripts\activate on Windows
python test_privacy.py
"""
import sys
import os
from datetime import datetime, timedelta
from pymongo import MongoClient
# Add backend directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from config import Config
from services.tracking_service import (
create_newsletter_tracking,
anonymize_old_tracking_data,
delete_subscriber_tracking_data
)
from database import (
newsletter_sends_collection,
link_clicks_collection,
subscriber_activity_collection,
subscribers_collection
)
from app import app
print("\n" + "="*80)
print("Privacy Compliance Tests")
print("="*80)
# Test counters
tests_passed = 0
tests_failed = 0
def test_result(test_name, passed, message=""):
"""Print test result"""
global tests_passed, tests_failed
if passed:
tests_passed += 1
print(f"{test_name}")
if message:
print(f" {message}")
else:
tests_failed += 1
print(f"{test_name}")
if message:
print(f" {message}")
# Setup: Clean up test data
print("\n" + "-"*80)
print("Setup: Cleaning test data")
print("-"*80)
test_newsletter_id = 'privacy-test-newsletter'
test_email = 'privacy-test@example.com'
test_email_opted_out = 'opted-out@example.com'
newsletter_sends_collection.delete_many({'newsletter_id': test_newsletter_id})
link_clicks_collection.delete_many({'newsletter_id': test_newsletter_id})
subscriber_activity_collection.delete_many({'email': {'$in': [test_email, test_email_opted_out]}})
subscribers_collection.delete_many({'email': {'$in': [test_email, test_email_opted_out]}})
print("✓ Test data cleaned")
# Test 1: Data Anonymization
print("\n" + "-"*80)
print("Test 1: Data Anonymization")
print("-"*80)
try:
# Create old tracking records (older than 90 days)
old_date = datetime.utcnow() - timedelta(days=100)
old_newsletter_doc = {
'newsletter_id': test_newsletter_id,
'subscriber_email': 'old-user@example.com',
'tracking_id': 'old-tracking-id-1',
'sent_at': old_date,
'opened': True,
'first_opened_at': old_date,
'last_opened_at': old_date,
'open_count': 3,
'created_at': old_date
}
newsletter_sends_collection.insert_one(old_newsletter_doc)
old_link_doc = {
'tracking_id': 'old-link-tracking-id-1',
'newsletter_id': test_newsletter_id,
'subscriber_email': 'old-user@example.com',
'article_url': 'https://example.com/old-article',
'article_title': 'Old Article',
'clicked': True,
'clicked_at': old_date,
'created_at': old_date
}
link_clicks_collection.insert_one(old_link_doc)
# Create recent tracking records (within 90 days)
recent_date = datetime.utcnow() - timedelta(days=30)
recent_newsletter_doc = {
'newsletter_id': test_newsletter_id,
'subscriber_email': 'recent-user@example.com',
'tracking_id': 'recent-tracking-id-1',
'sent_at': recent_date,
'opened': True,
'first_opened_at': recent_date,
'last_opened_at': recent_date,
'open_count': 1,
'created_at': recent_date
}
newsletter_sends_collection.insert_one(recent_newsletter_doc)
# Run anonymization
result = anonymize_old_tracking_data(retention_days=90)
# Check that old records were anonymized
old_newsletter_after = newsletter_sends_collection.find_one({'tracking_id': 'old-tracking-id-1'})
old_anonymized = old_newsletter_after and old_newsletter_after['subscriber_email'] == 'anonymized'
test_result("Anonymizes old newsletter records", old_anonymized,
f"Email: {old_newsletter_after.get('subscriber_email', 'N/A') if old_newsletter_after else 'N/A'}")
old_link_after = link_clicks_collection.find_one({'tracking_id': 'old-link-tracking-id-1'})
link_anonymized = old_link_after and old_link_after['subscriber_email'] == 'anonymized'
test_result("Anonymizes old link click records", link_anonymized,
f"Email: {old_link_after.get('subscriber_email', 'N/A') if old_link_after else 'N/A'}")
# Check that aggregated metrics are preserved
metrics_preserved = (
old_newsletter_after and
old_newsletter_after['open_count'] == 3 and
old_newsletter_after['opened'] == True
)
test_result("Preserves aggregated metrics", metrics_preserved,
f"Open count: {old_newsletter_after.get('open_count', 0) if old_newsletter_after else 0}")
# Check that recent records were NOT anonymized
recent_newsletter_after = newsletter_sends_collection.find_one({'tracking_id': 'recent-tracking-id-1'})
recent_not_anonymized = (
recent_newsletter_after and
recent_newsletter_after['subscriber_email'] == 'recent-user@example.com'
)
test_result("Does not anonymize recent records", recent_not_anonymized,
f"Email: {recent_newsletter_after.get('subscriber_email', 'N/A') if recent_newsletter_after else 'N/A'}")
# Check return counts
correct_counts = result['newsletter_sends_anonymized'] >= 1 and result['link_clicks_anonymized'] >= 1
test_result("Returns correct anonymization counts", correct_counts,
f"Newsletter: {result['newsletter_sends_anonymized']}, Links: {result['link_clicks_anonymized']}")
except Exception as e:
test_result("Data anonymization", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 2: User Data Deletion
print("\n" + "-"*80)
print("Test 2: User Data Deletion")
print("-"*80)
try:
# Create tracking records for a specific user
article_links = [
{'url': 'https://example.com/article1', 'title': 'Article 1'},
{'url': 'https://example.com/article2', 'title': 'Article 2'}
]
tracking_data = create_newsletter_tracking(
newsletter_id=test_newsletter_id,
subscriber_email=test_email,
article_links=article_links
)
# Create subscriber activity record
subscriber_activity_collection.insert_one({
'email': test_email,
'status': 'active',
'last_opened_at': datetime.utcnow(),
'total_opens': 5,
'total_clicks': 3
})
# Verify records exist
newsletter_count_before = newsletter_sends_collection.count_documents({'subscriber_email': test_email})
link_count_before = link_clicks_collection.count_documents({'subscriber_email': test_email})
activity_count_before = subscriber_activity_collection.count_documents({'email': test_email})
records_exist = newsletter_count_before > 0 and link_count_before > 0 and activity_count_before > 0
test_result("Creates test tracking records", records_exist,
f"Newsletter: {newsletter_count_before}, Links: {link_count_before}, Activity: {activity_count_before}")
# Delete all tracking data for the user
delete_result = delete_subscriber_tracking_data(test_email)
# Verify all records were deleted
newsletter_count_after = newsletter_sends_collection.count_documents({'subscriber_email': test_email})
link_count_after = link_clicks_collection.count_documents({'subscriber_email': test_email})
activity_count_after = subscriber_activity_collection.count_documents({'email': test_email})
all_deleted = newsletter_count_after == 0 and link_count_after == 0 and activity_count_after == 0
test_result("Deletes all tracking records", all_deleted,
f"Remaining - Newsletter: {newsletter_count_after}, Links: {link_count_after}, Activity: {activity_count_after}")
# Check return counts
correct_delete_counts = (
delete_result['newsletter_sends_deleted'] == newsletter_count_before and
delete_result['link_clicks_deleted'] == link_count_before and
delete_result['subscriber_activity_deleted'] == activity_count_before
)
test_result("Returns correct deletion counts", correct_delete_counts,
f"Deleted - Newsletter: {delete_result['newsletter_sends_deleted']}, Links: {delete_result['link_clicks_deleted']}, Activity: {delete_result['subscriber_activity_deleted']}")
except Exception as e:
test_result("User data deletion", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 3: Tracking Opt-Out
print("\n" + "-"*80)
print("Test 3: Tracking Opt-Out")
print("-"*80)
try:
# Create subscriber with tracking disabled
subscribers_collection.insert_one({
'email': test_email_opted_out,
'subscribed_at': datetime.utcnow(),
'tracking_enabled': False
})
# Try to create tracking for opted-out subscriber
article_links = [
{'url': 'https://example.com/article1', 'title': 'Article 1'}
]
tracking_data_opted_out = create_newsletter_tracking(
newsletter_id=test_newsletter_id,
subscriber_email=test_email_opted_out,
article_links=article_links
)
# Check that no tracking was created
no_pixel_id = tracking_data_opted_out['pixel_tracking_id'] is None
test_result("Does not create pixel tracking for opted-out users", no_pixel_id,
f"Pixel ID: {tracking_data_opted_out['pixel_tracking_id']}")
empty_link_map = len(tracking_data_opted_out['link_tracking_map']) == 0
test_result("Does not create link tracking for opted-out users", empty_link_map,
f"Link map size: {len(tracking_data_opted_out['link_tracking_map'])}")
tracking_disabled_flag = tracking_data_opted_out.get('tracking_enabled') == False
test_result("Returns tracking_enabled=False for opted-out users", tracking_disabled_flag)
# Verify no database records were created
newsletter_count = newsletter_sends_collection.count_documents({'subscriber_email': test_email_opted_out})
link_count = link_clicks_collection.count_documents({'subscriber_email': test_email_opted_out})
no_db_records = newsletter_count == 0 and link_count == 0
test_result("Does not create database records for opted-out users", no_db_records,
f"Newsletter records: {newsletter_count}, Link records: {link_count}")
# Test opt-in/opt-out endpoints
with app.test_client() as client:
# Create a subscriber with tracking enabled
subscribers_collection.insert_one({
'email': test_email,
'subscribed_at': datetime.utcnow(),
'tracking_enabled': True
})
# Opt out
response = client.post(f'/api/tracking/subscriber/{test_email}/opt-out')
opt_out_success = response.status_code == 200 and response.json.get('success') == True
test_result("Opt-out endpoint works", opt_out_success,
f"Status: {response.status_code}")
# Verify tracking is disabled
subscriber = subscribers_collection.find_one({'email': test_email})
tracking_disabled = subscriber and subscriber.get('tracking_enabled') == False
test_result("Opt-out disables tracking in database", tracking_disabled)
# Opt back in
response = client.post(f'/api/tracking/subscriber/{test_email}/opt-in')
opt_in_success = response.status_code == 200 and response.json.get('success') == True
test_result("Opt-in endpoint works", opt_in_success,
f"Status: {response.status_code}")
# Verify tracking is enabled
subscriber = subscribers_collection.find_one({'email': test_email})
tracking_enabled = subscriber and subscriber.get('tracking_enabled') == True
test_result("Opt-in enables tracking in database", tracking_enabled)
except Exception as e:
test_result("Tracking opt-out", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 4: Privacy API Endpoints
print("\n" + "-"*80)
print("Test 4: Privacy API Endpoints")
print("-"*80)
try:
with app.test_client() as client:
# Create test tracking data
article_links = [{'url': 'https://example.com/test', 'title': 'Test'}]
create_newsletter_tracking(
newsletter_id=test_newsletter_id,
subscriber_email='api-test@example.com',
article_links=article_links
)
# Test deletion endpoint
response = client.delete('/api/tracking/subscriber/api-test@example.com')
delete_endpoint_works = response.status_code == 200 and response.json.get('success') == True
test_result("Deletion endpoint returns success", delete_endpoint_works,
f"Status: {response.status_code}")
# Verify data was deleted
remaining_records = newsletter_sends_collection.count_documents({'subscriber_email': 'api-test@example.com'})
data_deleted = remaining_records == 0
test_result("Deletion endpoint removes data", data_deleted,
f"Remaining records: {remaining_records}")
# Test anonymization endpoint
response = client.post('/api/tracking/anonymize', json={'retention_days': 90})
anonymize_endpoint_works = response.status_code == 200 and response.json.get('success') == True
test_result("Anonymization endpoint returns success", anonymize_endpoint_works,
f"Status: {response.status_code}")
has_counts = 'anonymized_counts' in response.json
test_result("Anonymization endpoint returns counts", has_counts)
except Exception as e:
test_result("Privacy API endpoints", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Clean up test data
print("\n" + "-"*80)
print("Cleaning up test data...")
print("-"*80)
try:
newsletter_sends_collection.delete_many({'newsletter_id': test_newsletter_id})
link_clicks_collection.delete_many({'newsletter_id': test_newsletter_id})
subscriber_activity_collection.delete_many({'email': {'$in': [test_email, test_email_opted_out, 'api-test@example.com']}})
subscribers_collection.delete_many({'email': {'$in': [test_email, test_email_opted_out, 'api-test@example.com']}})
# Clean up anonymized records
newsletter_sends_collection.delete_many({'subscriber_email': 'anonymized'})
link_clicks_collection.delete_many({'subscriber_email': 'anonymized'})
print("✓ Test data cleaned up")
except Exception as e:
print(f"⚠ Error cleaning up: {str(e)}")
# Summary
print("\n" + "="*80)
print("TEST SUMMARY")
print("="*80)
print(f"Total tests: {tests_passed + tests_failed}")
print(f"✓ Passed: {tests_passed}")
print(f"❌ Failed: {tests_failed}")
if tests_failed == 0:
print("\n🎉 All privacy compliance tests passed!")
else:
print(f"\n{tests_failed} test(s) failed")
print("="*80 + "\n")
# Exit with appropriate code
sys.exit(0 if tests_failed == 0 else 1)

View File

@@ -0,0 +1,128 @@
#!/usr/bin/env python
"""
Test RSS feed URL extraction
Run from backend directory with venv activated:
cd backend
source venv/bin/activate # or venv\Scripts\activate on Windows
python test_rss_extraction.py
"""
from pymongo import MongoClient
from config import Config
import feedparser
from utils.rss_utils import extract_article_url, extract_article_summary, extract_published_date
print("\n" + "="*80)
print("RSS Feed URL Extraction Test")
print("="*80)
# Connect to database
print(f"\nConnecting to MongoDB: {Config.MONGODB_URI}")
client = MongoClient(Config.MONGODB_URI)
db = client[Config.DB_NAME]
# Get RSS feeds
print("Fetching RSS feeds from database...")
feeds = list(db['rss_feeds'].find())
if not feeds:
print("\n❌ No RSS feeds in database!")
print("\nAdd a feed first:")
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
print(" -H 'Content-Type: application/json' \\")
print(" -d '{\"name\": \"Süddeutsche Politik\", \"url\": \"https://rss.sueddeutsche.de/rss/Politik\"}'")
exit(1)
print(f"✓ Found {len(feeds)} feed(s)\n")
# Test each feed
total_success = 0
total_fail = 0
for feed_doc in feeds:
name = feed_doc.get('name', 'Unknown')
url = feed_doc.get('url', '')
active = feed_doc.get('active', True)
print("\n" + "="*80)
print(f"Feed: {name}")
print(f"URL: {url}")
print(f"Active: {'Yes' if active else 'No'}")
print("="*80)
if not active:
print("⏭ Skipping (inactive)")
continue
try:
# Parse RSS
print("\nFetching RSS feed...")
feed = feedparser.parse(url)
if not feed.entries:
print("❌ No entries found in feed")
continue
print(f"✓ Found {len(feed.entries)} entries")
# Test first 3 entries
print(f"\nTesting first 3 entries:")
print("-" * 80)
for i, entry in enumerate(feed.entries[:3], 1):
print(f"\n📰 Entry {i}:")
# Title
title = entry.get('title', 'No title')
print(f" Title: {title[:65]}")
# Test URL extraction
article_url = extract_article_url(entry)
if article_url:
print(f" ✓ URL: {article_url}")
total_success += 1
else:
print(f" ❌ Could not extract URL")
print(f" Available fields: {list(entry.keys())[:10]}")
print(f" link: {entry.get('link', 'N/A')}")
print(f" guid: {entry.get('guid', 'N/A')}")
print(f" id: {entry.get('id', 'N/A')}")
total_fail += 1
# Test summary
summary = extract_article_summary(entry)
if summary:
print(f" ✓ Summary: {summary[:70]}...")
else:
print(f" ⚠ No summary")
# Test date
pub_date = extract_published_date(entry)
if pub_date:
print(f" ✓ Date: {pub_date}")
else:
print(f" ⚠ No date")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
# Summary
print("\n" + "="*80)
print("SUMMARY")
print("="*80)
print(f"Total URLs tested: {total_success + total_fail}")
print(f"✓ Successfully extracted: {total_success}")
print(f"❌ Failed to extract: {total_fail}")
if total_fail == 0:
print("\n🎉 All URLs extracted successfully!")
print("\nYou can now run the crawler:")
print(" cd ../news_crawler")
print(" pip install -r requirements.txt")
print(" python crawler_service.py 5")
else:
print(f"\n{total_fail} URL(s) could not be extracted")
print("Check the output above for details")
print("="*80 + "\n")

View File

@@ -0,0 +1,260 @@
#!/usr/bin/env python
"""
Test email tracking functionality
Run from backend directory with venv activated:
cd backend
source venv/bin/activate # or venv\Scripts\activate on Windows
python test_tracking.py
"""
import sys
import os
from datetime import datetime
from pymongo import MongoClient
# Add backend directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from config import Config
from services.tracking_service import generate_tracking_id, create_newsletter_tracking
from database import newsletter_sends_collection, link_clicks_collection
from app import app
print("\n" + "="*80)
print("Email Tracking System Tests")
print("="*80)
# Test counters
tests_passed = 0
tests_failed = 0
def test_result(test_name, passed, message=""):
"""Print test result"""
global tests_passed, tests_failed
if passed:
tests_passed += 1
print(f"{test_name}")
if message:
print(f" {message}")
else:
tests_failed += 1
print(f"{test_name}")
if message:
print(f" {message}")
# Test 1: Tracking ID Generation
print("\n" + "-"*80)
print("Test 1: Tracking ID Generation")
print("-"*80)
try:
tracking_id = generate_tracking_id()
# Check format (UUID4)
is_valid_uuid = len(tracking_id) == 36 and tracking_id.count('-') == 4
test_result("Generate tracking ID", is_valid_uuid, f"Generated ID: {tracking_id}")
# Check uniqueness
tracking_id2 = generate_tracking_id()
is_unique = tracking_id != tracking_id2
test_result("Tracking IDs are unique", is_unique, f"ID1: {tracking_id[:8]}... ID2: {tracking_id2[:8]}...")
except Exception as e:
test_result("Generate tracking ID", False, f"Error: {str(e)}")
# Test 2: Create Newsletter Tracking
print("\n" + "-"*80)
print("Test 2: Create Newsletter Tracking")
print("-"*80)
try:
# Clean up test data first
newsletter_sends_collection.delete_many({'newsletter_id': 'test-newsletter-001'})
link_clicks_collection.delete_many({'newsletter_id': 'test-newsletter-001'})
# Create tracking with article links
article_links = [
{'url': 'https://example.com/article1', 'title': 'Test Article 1'},
{'url': 'https://example.com/article2', 'title': 'Test Article 2'}
]
tracking_data = create_newsletter_tracking(
newsletter_id='test-newsletter-001',
subscriber_email='test@example.com',
article_links=article_links
)
# Verify return data structure
has_pixel_id = 'pixel_tracking_id' in tracking_data
test_result("Returns pixel tracking ID", has_pixel_id)
has_link_map = 'link_tracking_map' in tracking_data
test_result("Returns link tracking map", has_link_map)
correct_link_count = len(tracking_data.get('link_tracking_map', {})) == 2
test_result("Creates tracking for all links", correct_link_count,
f"Created {len(tracking_data.get('link_tracking_map', {}))} link tracking records")
# Verify database records
newsletter_record = newsletter_sends_collection.find_one({
'tracking_id': tracking_data['pixel_tracking_id']
})
record_exists = newsletter_record is not None
test_result("Creates newsletter_sends record", record_exists)
if newsletter_record:
correct_initial_state = (
newsletter_record['opened'] == False and
newsletter_record['open_count'] == 0 and
newsletter_record['first_opened_at'] is None
)
test_result("Newsletter record has correct initial state", correct_initial_state)
# Verify link click records
link_records = list(link_clicks_collection.find({'newsletter_id': 'test-newsletter-001'}))
correct_link_records = len(link_records) == 2
test_result("Creates link_clicks records", correct_link_records,
f"Created {len(link_records)} link click records")
except Exception as e:
test_result("Create newsletter tracking", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 3: Tracking Pixel Endpoint
print("\n" + "-"*80)
print("Test 3: Tracking Pixel Endpoint")
print("-"*80)
try:
with app.test_client() as client:
# Test with valid tracking ID
pixel_tracking_id = tracking_data['pixel_tracking_id']
response = client.get(f'/api/track/pixel/{pixel_tracking_id}')
is_png = response.content_type == 'image/png'
test_result("Returns PNG for valid tracking_id", is_png,
f"Content-Type: {response.content_type}")
is_200 = response.status_code == 200
test_result("Returns 200 status", is_200, f"Status: {response.status_code}")
# Verify database was updated
updated_record = newsletter_sends_collection.find_one({
'tracking_id': pixel_tracking_id
})
was_logged = (
updated_record and
updated_record['opened'] == True and
updated_record['open_count'] == 1 and
updated_record['first_opened_at'] is not None
)
test_result("Logs email open event", was_logged,
f"Open count: {updated_record.get('open_count', 0) if updated_record else 0}")
# Test multiple opens
response2 = client.get(f'/api/track/pixel/{pixel_tracking_id}')
updated_record2 = newsletter_sends_collection.find_one({
'tracking_id': pixel_tracking_id
})
handles_multiple = (
updated_record2 and
updated_record2['open_count'] == 2 and
updated_record2['last_opened_at'] != updated_record2['first_opened_at']
)
test_result("Handles multiple opens", handles_multiple,
f"Open count: {updated_record2.get('open_count', 0) if updated_record2 else 0}")
# Test with invalid tracking ID
response3 = client.get('/api/track/pixel/invalid-tracking-id-12345')
fails_silently = response3.status_code == 200 and response3.content_type == 'image/png'
test_result("Returns PNG for invalid tracking_id (fails silently)", fails_silently)
except Exception as e:
test_result("Tracking pixel endpoint", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Test 4: Link Redirect Endpoint
print("\n" + "-"*80)
print("Test 4: Link Redirect Endpoint")
print("-"*80)
try:
with app.test_client() as client:
# Test with valid tracking ID
article_url = 'https://example.com/article1'
link_tracking_id = tracking_data['link_tracking_map'][article_url]
response = client.get(f'/api/track/click/{link_tracking_id}', follow_redirects=False)
is_redirect = response.status_code == 302
test_result("Returns 302 redirect", is_redirect, f"Status: {response.status_code}")
correct_location = response.location == article_url
test_result("Redirects to correct URL", correct_location,
f"Location: {response.location}")
# Verify database was updated
click_record = link_clicks_collection.find_one({
'tracking_id': link_tracking_id
})
was_logged = (
click_record and
click_record['clicked'] == True and
click_record['clicked_at'] is not None
)
test_result("Logs click event", was_logged)
# Test with invalid tracking ID
response2 = client.get('/api/track/click/invalid-tracking-id-12345', follow_redirects=False)
redirects_on_invalid = response2.status_code == 302
test_result("Redirects on invalid tracking_id", redirects_on_invalid,
f"Redirects to: {response2.location}")
except Exception as e:
test_result("Link redirect endpoint", False, f"Error: {str(e)}")
import traceback
traceback.print_exc()
# Clean up test data
print("\n" + "-"*80)
print("Cleaning up test data...")
print("-"*80)
try:
newsletter_sends_collection.delete_many({'newsletter_id': 'test-newsletter-001'})
link_clicks_collection.delete_many({'newsletter_id': 'test-newsletter-001'})
print("✓ Test data cleaned up")
except Exception as e:
print(f"⚠ Error cleaning up: {str(e)}")
# Summary
print("\n" + "="*80)
print("TEST SUMMARY")
print("="*80)
print(f"Total tests: {tests_passed + tests_failed}")
print(f"✓ Passed: {tests_passed}")
print(f"❌ Failed: {tests_failed}")
if tests_failed == 0:
print("\n🎉 All tests passed!")
else:
print(f"\n{tests_failed} test(s) failed")
print("="*80 + "\n")
# Exit with appropriate code
sys.exit(0 if tests_failed == 0 else 1)

View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python
"""
Test script to verify crawler functionality
"""
from crawler_service import extract_article_content, get_active_rss_feeds
import sys
def test_content_extraction():
"""Test content extraction from a sample URL"""
print("Testing content extraction...")
# Test with a simple news site
test_url = "https://www.bbc.com/news"
print(f"Extracting content from: {test_url}")
result = extract_article_content(test_url, timeout=10)
if result:
print("✓ Content extraction successful!")
print(f" Title: {result.get('title', 'N/A')[:50]}...")
print(f" Content length: {len(result.get('content', ''))} chars")
print(f" Word count: {result.get('word_count', 0)}")
return True
else:
print("✗ Content extraction failed")
return False
def test_database_connection():
"""Test MongoDB connection"""
print("\nTesting database connection...")
try:
feeds = get_active_rss_feeds()
print(f"✓ Database connection successful!")
print(f" Found {len(feeds)} active RSS feed(s)")
if feeds:
print("\n Active feeds:")
for feed in feeds:
print(f" - {feed['name']}: {feed['url']}")
else:
print("\n ⚠ No active feeds found. Add feeds via the backend API:")
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
print(" -H 'Content-Type: application/json' \\")
print(" -d '{\"name\": \"Test Feed\", \"url\": \"https://example.com/rss\"}'")
return True
except Exception as e:
print(f"✗ Database connection failed: {e}")
return False
def main():
print("="*60)
print("News Crawler - Test Suite")
print("="*60 + "\n")
# Test database connection
db_ok = test_database_connection()
# Test content extraction
extract_ok = test_content_extraction()
print("\n" + "="*60)
print("Test Results:")
print(f" Database Connection: {'✓ PASS' if db_ok else '✗ FAIL'}")
print(f" Content Extraction: {'✓ PASS' if extract_ok else '✗ FAIL'}")
print("="*60 + "\n")
if db_ok and extract_ok:
print("✓ All tests passed! Crawler is ready to use.")
print("\nRun the crawler with:")
print(" python crawler_service.py")
return 0
else:
print("✗ Some tests failed. Please check the errors above.")
return 1
if __name__ == '__main__':
sys.exit(main())

View File

@@ -0,0 +1,129 @@
#!/usr/bin/env python
"""
Test script for Ollama integration
Tests connection, configuration, and summarization
"""
from config import Config
from ollama_client import OllamaClient
print("\n" + "="*70)
print("Ollama Integration Test")
print("="*70)
# Print configuration
Config.print_config()
# Validate configuration
issues = Config.validate()
if issues:
print("⚠ Configuration Issues:")
for issue in issues:
print(f" - {issue}")
print()
# Initialize client
client = OllamaClient(
base_url=Config.OLLAMA_BASE_URL,
model=Config.OLLAMA_MODEL,
api_key=Config.OLLAMA_API_KEY,
enabled=Config.OLLAMA_ENABLED,
timeout=Config.OLLAMA_TIMEOUT
)
# Test 1: Check if Ollama is enabled
print("Test 1: Configuration Check")
print(f" Ollama Enabled: {Config.OLLAMA_ENABLED}")
if not Config.OLLAMA_ENABLED:
print(" ⚠ Ollama is disabled. Set OLLAMA_ENABLED=true in .env to enable.")
print("\n" + "="*70)
exit(0)
# Test 2: Test connection
print("\nTest 2: Connection Test")
conn_result = client.test_connection()
print(f" Available: {conn_result['available']}")
print(f" Current Model: {conn_result['current_model']}")
if conn_result['available']:
print(f" ✓ Connected to Ollama server")
if conn_result['models']:
print(f" Available models: {', '.join(conn_result['models'][:5])}")
if conn_result['current_model'] not in conn_result['models']:
print(f" ⚠ Warning: Model '{conn_result['current_model']}' not found in available models")
else:
print(f" ✗ Connection failed: {conn_result['error']}")
print("\n" + "="*70)
exit(1)
# Test 3: Test summarization with sample article
print("\nTest 3: Summarization Test")
print(" Testing with sample German article...")
sample_article = """
Die neue U-Bahn-Linie, die das Münchner Stadtzentrum mit dem Flughafen verbindet, wurde heute eröffnet.
Oberbürgermeister Dieter Reiter nahm zusammen mit hunderten Anwohnern an der Eröffnungszeremonie teil.
Die Linie wird die Reisezeit zwischen dem Flughafen und der Münchner Innenstadt erheblich verkürzen.
Der Bau dauerte fünf Jahre und kostete etwa 2 Milliarden Euro. Die neue Linie umfasst 10 Stationen
und verkehrt während der Hauptverkehrszeiten alle 10 Minuten. Experten erwarten, dass die neue Verbindung
den Verkehr in der Stadt deutlich entlasten wird. Die Münchner Verkehrsgesellschaft rechnet mit täglich
über 50.000 Fahrgästen auf der neuen Strecke.
"""
result = client.summarize_article(sample_article, max_words=Config.SUMMARY_MAX_WORDS)
print(f"\n Success: {result['success']}")
if result['success']:
print(f" ✓ Summarization successful!")
print(f"\n Original word count: {result['original_word_count']}")
print(f" Summary word count: {result['summary_word_count']}")
print(f" Compression ratio: {result['original_word_count'] / max(result['summary_word_count'], 1):.1f}x")
print(f" Duration: {result['duration']:.2f}s")
print(f"\n Summary (English):")
print(f" {'-'*70}")
print(f" {result['summary']}")
print(f" {'-'*70}")
else:
print(f" ✗ Summarization failed: {result['error']}")
# Test 4: Test with English article
print("\nTest 4: English Article Test")
print(" Testing with English article...")
english_article = """
The city council approved a new bike lane network spanning 50 kilometers across Munich.
The project aims to promote sustainable transportation and reduce car traffic in the city center.
Construction will begin next month and is expected to be completed within two years.
The bike lanes will connect major residential areas with business districts and public transport hubs.
Environmental groups have praised the initiative as a significant step toward carbon neutrality.
"""
result2 = client.summarize_article(english_article, max_words=50)
print(f"\n Success: {result2['success']}")
if result2['success']:
print(f" ✓ Summarization successful!")
print(f" Original: {result2['original_word_count']} words → Summary: {result2['summary_word_count']} words")
print(f" Duration: {result2['duration']:.2f}s")
print(f"\n Summary:")
print(f" {result2['summary']}")
else:
print(f" ✗ Summarization failed: {result2['error']}")
# Summary
print("\n" + "="*70)
print("Test Summary")
print("="*70)
print(f"✓ Configuration: Valid")
print(f"✓ Connection: {'Success' if conn_result['available'] else 'Failed'}")
print(f"✓ German→English: {'Success' if result['success'] else 'Failed'}")
print(f"✓ English→English: {'Success' if result2['success'] else 'Failed'}")
print("="*70)
if result['success'] and result2['success']:
print("\n🎉 All tests passed! Ollama integration is working correctly.")
print("\nYou can now run the crawler with AI summarization:")
print(" python crawler_service.py 5")
else:
print("\n⚠ Some tests failed. Check the errors above.")
print()

View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python
"""
Test script to verify RSS feed URL extraction
Tests actual feeds from the database
"""
import feedparser
from pymongo import MongoClient
import os
from dotenv import load_dotenv
from rss_utils import extract_article_url, extract_article_summary, extract_published_date
# Load environment variables
load_dotenv(dotenv_path='../.env')
# MongoDB setup
MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
DB_NAME = 'munich_news'
client = MongoClient(MONGODB_URI)
db = client[DB_NAME]
rss_feeds_collection = db['rss_feeds']
def test_feed(feed_name, feed_url):
"""Test a single RSS feed"""
print(f"\n{'='*70}")
print(f"Testing: {feed_name}")
print(f"URL: {feed_url}")
print('='*70)
try:
# Parse the feed
print("Fetching RSS feed...")
feed = feedparser.parse(feed_url)
if not feed.entries:
print("❌ No entries found in feed")
return False
print(f"✓ Found {len(feed.entries)} entries\n")
# Test first 5 entries
success_count = 0
fail_count = 0
for i, entry in enumerate(feed.entries[:5], 1):
print(f"\n--- Entry {i} ---")
print(f"Title: {entry.get('title', 'No title')[:60]}")
# Test URL extraction
article_url = extract_article_url(entry)
if article_url:
print(f"✓ URL: {article_url}")
success_count += 1
else:
print(f"❌ No valid URL found")
print(f" Available fields: {list(entry.keys())}")
print(f" link: {entry.get('link', 'N/A')}")
print(f" guid: {entry.get('guid', 'N/A')}")
print(f" id: {entry.get('id', 'N/A')}")
fail_count += 1
# Test summary extraction
summary = extract_article_summary(entry)
if summary:
print(f"✓ Summary: {summary[:80]}...")
else:
print(f"⚠ No summary found")
# Test date extraction
pub_date = extract_published_date(entry)
if pub_date:
print(f"✓ Published: {pub_date}")
else:
print(f"⚠ No published date found")
print(f"\n{'='*70}")
print(f"Results for {feed_name}:")
print(f" ✓ Success: {success_count}/5")
print(f" ❌ Failed: {fail_count}/5")
print('='*70)
return fail_count == 0
except Exception as e:
print(f"❌ Error testing feed: {e}")
return False
def main():
print("\n" + "="*70)
print("RSS Feed URL Extraction Test")
print("="*70)
# Get all RSS feeds from database
print("\nFetching RSS feeds from database...")
feeds = list(rss_feeds_collection.find())
if not feeds:
print("❌ No RSS feeds found in database")
print("\nAdd feeds using:")
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
print(" -H 'Content-Type: application/json' \\")
print(" -d '{\"name\": \"Feed Name\", \"url\": \"https://example.com/rss\"}'")
return 1
print(f"✓ Found {len(feeds)} feed(s) in database\n")
# Test each feed
results = {}
for feed in feeds:
feed_name = feed.get('name', 'Unknown')
feed_url = feed.get('url', '')
active = feed.get('active', True)
if not active:
print(f"\n⏭ Skipping inactive feed: {feed_name}")
continue
if not feed_url:
print(f"\n❌ Feed '{feed_name}' has no URL")
results[feed_name] = False
continue
results[feed_name] = test_feed(feed_name, feed_url)
# Summary
print("\n" + "="*70)
print("FINAL RESULTS")
print("="*70)
for feed_name, success in results.items():
status = "✓ PASS" if success else "❌ FAIL"
print(f"{status} - {feed_name}")
total = len(results)
passed = sum(1 for s in results.values() if s)
print(f"\nTotal: {passed}/{total} feeds passed")
print("="*70 + "\n")
if passed == total:
print("✓ All feeds are working correctly!")
print("\nYou can now run the crawler:")
print(" python crawler_service.py")
return 0
else:
print("⚠ Some feeds have issues. Check the output above.")
return 1
if __name__ == '__main__':
import sys
sys.exit(main())

View File

@@ -0,0 +1,208 @@
#!/usr/bin/env python
"""
Integration test for newsletter with tracking.
Tests the full flow of generating a newsletter with tracking enabled.
"""
import sys
from pathlib import Path
from datetime import datetime
# Add backend directory to path
backend_dir = Path(__file__).parent.parent / 'backend'
sys.path.insert(0, str(backend_dir))
# Mock the tracking service to avoid database dependency
class MockTrackingService:
"""Mock tracking service for testing"""
@staticmethod
def create_newsletter_tracking(newsletter_id, subscriber_email, article_links=None):
"""Mock create_newsletter_tracking function"""
link_tracking_map = {}
if article_links:
for i, article in enumerate(article_links):
link_tracking_map[article['url']] = f"mock-link-{i}"
return {
'pixel_tracking_id': 'mock-pixel-123',
'link_tracking_map': link_tracking_map,
'newsletter_id': newsletter_id,
'subscriber_email': subscriber_email
}
# Import after setting up path
from tracking_integration import inject_tracking_pixel, replace_article_links, generate_tracking_urls
from jinja2 import Template
def test_newsletter_with_tracking():
"""Test generating a newsletter with tracking enabled"""
print("\n" + "="*70)
print("NEWSLETTER TRACKING INTEGRATION TEST")
print("="*70)
# Mock article data
articles = [
{
'title': 'Munich Tech Summit Announces 2025 Dates',
'author': 'Tech Reporter',
'link': 'https://example.com/tech-summit',
'summary': 'The annual Munich Tech Summit will return in 2025 with exciting new features.',
'source': 'Munich Tech News',
'published_at': datetime.now()
},
{
'title': 'New Public Transport Routes Launched',
'author': 'Transport Desk',
'link': 'https://example.com/transport-routes',
'summary': 'MVG announces three new bus routes connecting suburban areas.',
'source': 'Munich Transport',
'published_at': datetime.now()
}
]
# Configuration
newsletter_id = 'test-newsletter-2025-11-11'
subscriber_email = 'test@example.com'
api_url = 'http://localhost:5001'
print(f"\nNewsletter ID: {newsletter_id}")
print(f"Subscriber: {subscriber_email}")
print(f"Articles: {len(articles)}")
print(f"API URL: {api_url}")
# Step 1: Generate tracking URLs
print("\n" + "-"*70)
print("Step 1: Generate tracking data")
print("-"*70)
tracking_data = generate_tracking_urls(
articles=articles,
newsletter_id=newsletter_id,
subscriber_email=subscriber_email,
tracking_service=MockTrackingService
)
print(f"✓ Pixel tracking ID: {tracking_data['pixel_tracking_id']}")
print(f"✓ Link tracking map: {len(tracking_data['link_tracking_map'])} links")
for url, tracking_id in tracking_data['link_tracking_map'].items():
print(f" - {url}{tracking_id}")
# Step 2: Load and render template
print("\n" + "-"*70)
print("Step 2: Render newsletter template")
print("-"*70)
template_path = Path(__file__).parent / 'newsletter_template.html'
with open(template_path, 'r', encoding='utf-8') as f:
template_content = f.read()
template = Template(template_content)
now = datetime.now()
template_data = {
'date': now.strftime('%A, %B %d, %Y'),
'year': now.year,
'article_count': len(articles),
'articles': articles,
'unsubscribe_link': 'http://localhost:3000/unsubscribe',
'website_link': 'http://localhost:3000',
'tracking_enabled': True
}
html = template.render(**template_data)
print("✓ Template rendered")
# Step 3: Inject tracking pixel
print("\n" + "-"*70)
print("Step 3: Inject tracking pixel")
print("-"*70)
html = inject_tracking_pixel(
html,
tracking_data['pixel_tracking_id'],
api_url
)
pixel_url = f"{api_url}/api/track/pixel/{tracking_data['pixel_tracking_id']}"
if pixel_url in html:
print(f"✓ Tracking pixel injected: {pixel_url}")
else:
print(f"✗ Tracking pixel NOT found")
return False
# Step 4: Replace article links
print("\n" + "-"*70)
print("Step 4: Replace article links with tracking URLs")
print("-"*70)
html = replace_article_links(
html,
tracking_data['link_tracking_map'],
api_url
)
# Verify all article links were replaced
success = True
for article in articles:
original_url = article['link']
tracking_id = tracking_data['link_tracking_map'].get(original_url)
if tracking_id:
tracking_url = f"{api_url}/api/track/click/{tracking_id}"
if tracking_url in html:
print(f"✓ Link replaced: {original_url}")
print(f"{tracking_url}")
else:
print(f"✗ Link NOT replaced: {original_url}")
success = False
# Verify original URL is NOT in the HTML (should be replaced)
if f'href="{original_url}"' in html:
print(f"✗ Original URL still present: {original_url}")
success = False
# Step 5: Verify privacy notice
print("\n" + "-"*70)
print("Step 5: Verify privacy notice")
print("-"*70)
if "This email contains tracking to measure engagement" in html:
print("✓ Privacy notice present in footer")
else:
print("✗ Privacy notice NOT found")
success = False
# Step 6: Save output for inspection
print("\n" + "-"*70)
print("Step 6: Save test output")
print("-"*70)
output_file = 'test_newsletter_with_tracking.html'
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html)
print(f"✓ Test newsletter saved to: {output_file}")
print(f" Open it in your browser to inspect the tracking integration")
return success
if __name__ == '__main__':
print("\n" + "="*70)
print("TESTING NEWSLETTER WITH TRACKING")
print("="*70)
success = test_newsletter_with_tracking()
print("\n" + "="*70)
if success:
print("✓ ALL TESTS PASSED")
print("="*70 + "\n")
sys.exit(0)
else:
print("✗ SOME TESTS FAILED")
print("="*70 + "\n")
sys.exit(1)

View File

@@ -0,0 +1,179 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<title>Munich News Daily</title>
<!--[if mso]>
<style type="text/css">
body, table, td {font-family: Arial, Helvetica, sans-serif !important;}
</style>
<![endif]-->
</head>
<body style="margin: 0; padding: 0; background-color: #f4f4f4; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;">
<!-- Wrapper Table -->
<table border="0" cellpadding="0" cellspacing="0" role="presentation" style="background-color: #f4f4f4;" width="100%">
<tr>
<td align="center" style="padding: 20px 0;">
<!-- Main Container -->
<table border="0" cellpadding="0" cellspacing="0" role="presentation" style="background-color: #ffffff; max-width: 600px;" width="600">
<!-- Header -->
<tr>
<td style="background-color: #1a1a1a; padding: 30px 40px; text-align: center;">
<h1 style="margin: 0 0 8px 0; font-size: 28px; font-weight: 700; color: #ffffff; letter-spacing: -0.5px;">
Munich News Daily
</h1>
<p style="margin: 0; font-size: 14px; color: #999999; letter-spacing: 0.5px;">
Tuesday, November 11, 2025
</p>
</td>
</tr>
<!-- Greeting -->
<tr>
<td style="padding: 30px 40px 20px 40px;">
<p style="margin: 0; font-size: 16px; line-height: 1.5; color: #333333;">
Good morning ☀️
</p>
<p style="margin: 15px 0 0 0; font-size: 15px; line-height: 1.6; color: #666666;">
Here's what's happening in Munich today. We've summarized 2 stories using AI so you can stay informed in under 5 minutes.
</p>
</td>
</tr>
<!-- Divider -->
<tr>
<td style="padding: 0 40px;">
<div style="height: 1px; background-color: #e0e0e0;"></div>
</td>
</tr>
<!-- Articles -->
<tr>
<td style="padding: 25px 40px;">
<!-- Article Number Badge -->
<table border="0" cellpadding="0" cellspacing="0" role="presentation" width="100%">
<tr>
<td>
<span style="display: inline-block; background-color: #000000; color: #ffffff; width: 24px; height: 24px; line-height: 24px; text-align: center; border-radius: 50%; font-size: 12px; font-weight: 600;">
1
</span>
</td>
</tr>
</table>
<!-- Article Title -->
<h2 style="margin: 12px 0 8px 0; font-size: 19px; font-weight: 700; line-height: 1.3; color: #1a1a1a;">
Munich Tech Summit Announces 2025 Dates
</h2>
<!-- Article Meta -->
<p style="margin: 0 0 12px 0; font-size: 13px; color: #999999;">
<span style="color: #000000; font-weight: 600;">Munich Tech News</span>
<span> • Tech Reporter</span>
</p>
<!-- Article Summary -->
<p style="margin: 0 0 15px 0; font-size: 15px; line-height: 1.6; color: #333333;">
The annual Munich Tech Summit will return in 2025 with exciting new features.
</p>
<!-- Read More Link -->
<a href="http://localhost:5001/api/track/click/mock-link-0" style="display: inline-block; color: #000000; text-decoration: none; font-size: 14px; font-weight: 600; border-bottom: 2px solid #000000; padding-bottom: 2px;">
Read more →
</a>
</td>
</tr>
<!-- Article Divider -->
<tr>
<td style="padding: 0 40px;">
<div style="height: 1px; background-color: #f0f0f0;"></div>
</td>
</tr>
<tr>
<td style="padding: 25px 40px;">
<!-- Article Number Badge -->
<table border="0" cellpadding="0" cellspacing="0" role="presentation" width="100%">
<tr>
<td>
<span style="display: inline-block; background-color: #000000; color: #ffffff; width: 24px; height: 24px; line-height: 24px; text-align: center; border-radius: 50%; font-size: 12px; font-weight: 600;">
2
</span>
</td>
</tr>
</table>
<!-- Article Title -->
<h2 style="margin: 12px 0 8px 0; font-size: 19px; font-weight: 700; line-height: 1.3; color: #1a1a1a;">
New Public Transport Routes Launched
</h2>
<!-- Article Meta -->
<p style="margin: 0 0 12px 0; font-size: 13px; color: #999999;">
<span style="color: #000000; font-weight: 600;">Munich Transport</span>
<span> • Transport Desk</span>
</p>
<!-- Article Summary -->
<p style="margin: 0 0 15px 0; font-size: 15px; line-height: 1.6; color: #333333;">
MVG announces three new bus routes connecting suburban areas.
</p>
<!-- Read More Link -->
<a href="http://localhost:5001/api/track/click/mock-link-1" style="display: inline-block; color: #000000; text-decoration: none; font-size: 14px; font-weight: 600; border-bottom: 2px solid #000000; padding-bottom: 2px;">
Read more →
</a>
</td>
</tr>
<!-- Article Divider -->
<!-- Bottom Divider -->
<tr>
<td style="padding: 25px 40px 0 40px;">
<div style="height: 1px; background-color: #e0e0e0;"></div>
</td>
</tr>
<!-- Summary Box -->
<tr>
<td style="padding: 30px 40px;">
<table border="0" cellpadding="0" cellspacing="0" role="presentation" style="background-color: #f8f8f8; border-radius: 8px;" width="100%">
<tr>
<td style="padding: 25px; text-align: center;">
<p style="margin: 0 0 8px 0; font-size: 13px; color: #666666; text-transform: uppercase; letter-spacing: 1px; font-weight: 600;">
Today's Digest
</p>
<p style="margin: 0; font-size: 36px; font-weight: 700; color: #000000;">
2
</p>
<p style="margin: 8px 0 0 0; font-size: 14px; color: #666666;">
stories • AI-summarized • 5 min read
</p>
</td>
</tr>
</table>
</td>
</tr>
<!-- Footer -->
<tr>
<td style="background-color: #1a1a1a; padding: 30px 40px; text-align: center;">
<p style="margin: 0 0 15px 0; font-size: 14px; color: #ffffff; font-weight: 600;">
Munich News Daily
</p>
<p style="margin: 0 0 20px 0; font-size: 13px; color: #999999; line-height: 1.5;">
AI-powered news summaries for busy people.<br/>
Delivered daily to your inbox.
</p>
<!-- Footer Links -->
<p style="margin: 0; font-size: 12px; color: #666666;">
<a href="http://localhost:3000" style="color: #999999; text-decoration: none;">Visit Website</a>
<span style="color: #444444;"></span>
<a href="http://localhost:3000/unsubscribe" style="color: #999999; text-decoration: none;">Unsubscribe</a>
</p>
<!-- Privacy Notice -->
<p style="margin: 20px 0 0 0; font-size: 11px; color: #666666; line-height: 1.4;">
This email contains tracking to measure engagement and improve our content.<br/>
We respect your privacy and anonymize data after 90 days.
</p>
<p style="margin: 20px 0 0 0; font-size: 11px; color: #666666;">
© 2025 Munich News Daily. All rights reserved.
</p>
</td>
</tr>
</table>
<!-- End Main Container -->
</td>
</tr>
</table>
<!-- End Wrapper Table -->
<img alt="" height="1" src="http://localhost:5001/api/track/pixel/mock-pixel-123" style="display:block;" width="1"/></body>
</html>

View File

@@ -0,0 +1,187 @@
#!/usr/bin/env python
"""
Test script for tracking integration in newsletter sender.
Tests tracking pixel injection and link replacement.
"""
import sys
from pathlib import Path
# Add backend directory to path
backend_dir = Path(__file__).parent.parent / 'backend'
sys.path.insert(0, str(backend_dir))
from tracking_integration import inject_tracking_pixel, replace_article_links
def test_inject_tracking_pixel():
"""Test that tracking pixel is correctly injected into HTML"""
print("\n" + "="*70)
print("TEST 1: Inject Tracking Pixel")
print("="*70)
# Test HTML
html = """<html>
<body>
<p>Newsletter content</p>
</body>
</html>"""
tracking_id = "test-tracking-123"
api_url = "http://localhost:5001"
# Inject pixel
result = inject_tracking_pixel(html, tracking_id, api_url)
# Verify pixel is present
expected_pixel = f'<img src="{api_url}/api/track/pixel/{tracking_id}" width="1" height="1" alt="" style="display:block;" />'
if expected_pixel in result:
print("✓ Tracking pixel correctly injected")
print(f" Pixel URL: {api_url}/api/track/pixel/{tracking_id}")
return True
else:
print("✗ Tracking pixel NOT found in HTML")
print(f" Expected: {expected_pixel}")
print(f" Result: {result}")
return False
def test_replace_article_links():
"""Test that article links are correctly replaced with tracking URLs"""
print("\n" + "="*70)
print("TEST 2: Replace Article Links")
print("="*70)
# Test HTML with article links
html = """<html>
<body>
<a href="https://example.com/article1">Article 1</a>
<a href="https://example.com/article2">Article 2</a>
<a href="https://example.com/untracked">Untracked Link</a>
</body>
</html>"""
# Tracking map
link_tracking_map = {
"https://example.com/article1": "track-id-1",
"https://example.com/article2": "track-id-2"
}
api_url = "http://localhost:5001"
# Replace links
result = replace_article_links(html, link_tracking_map, api_url)
# Verify replacements
success = True
# Check article 1 link
expected_url_1 = f"{api_url}/api/track/click/track-id-1"
if expected_url_1 in result:
print(f"✓ Article 1 link replaced: {expected_url_1}")
else:
print(f"✗ Article 1 link NOT replaced")
success = False
# Check article 2 link
expected_url_2 = f"{api_url}/api/track/click/track-id-2"
if expected_url_2 in result:
print(f"✓ Article 2 link replaced: {expected_url_2}")
else:
print(f"✗ Article 2 link NOT replaced")
success = False
# Check untracked link remains unchanged
if "https://example.com/untracked" in result:
print(f"✓ Untracked link preserved: https://example.com/untracked")
else:
print(f"✗ Untracked link was modified (should remain unchanged)")
success = False
return success
def test_full_integration():
"""Test full integration: pixel + link replacement"""
print("\n" + "="*70)
print("TEST 3: Full Integration (Pixel + Links)")
print("="*70)
# Test HTML
html = """<html>
<body>
<h1>Newsletter</h1>
<a href="https://example.com/article">Read Article</a>
</body>
</html>"""
api_url = "http://localhost:5001"
pixel_tracking_id = "pixel-123"
link_tracking_map = {
"https://example.com/article": "link-456"
}
# First inject pixel
html = inject_tracking_pixel(html, pixel_tracking_id, api_url)
# Then replace links
html = replace_article_links(html, link_tracking_map, api_url)
# Verify both are present
success = True
pixel_url = f"{api_url}/api/track/pixel/{pixel_tracking_id}"
if pixel_url in html:
print(f"✓ Tracking pixel present: {pixel_url}")
else:
print(f"✗ Tracking pixel NOT found")
success = False
link_url = f"{api_url}/api/track/click/link-456"
if link_url in html:
print(f"✓ Tracking link present: {link_url}")
else:
print(f"✗ Tracking link NOT found")
success = False
if success:
print("\n✓ Full integration successful!")
print("\nFinal HTML:")
print("-" * 70)
print(html)
print("-" * 70)
return success
if __name__ == '__main__':
print("\n" + "="*70)
print("TRACKING INTEGRATION TEST SUITE")
print("="*70)
results = []
# Run tests
results.append(("Inject Tracking Pixel", test_inject_tracking_pixel()))
results.append(("Replace Article Links", test_replace_article_links()))
results.append(("Full Integration", test_full_integration()))
# Summary
print("\n" + "="*70)
print("TEST SUMMARY")
print("="*70)
passed = sum(1 for _, result in results if result)
total = len(results)
for test_name, result in results:
status = "✓ PASS" if result else "✗ FAIL"
print(f"{status}: {test_name}")
print("-" * 70)
print(f"Results: {passed}/{total} tests passed")
print("="*70 + "\n")
# Exit with appropriate code
sys.exit(0 if passed == total else 1)

96
tests/test_feeds_quick.py Normal file
View File

@@ -0,0 +1,96 @@
#!/usr/bin/env python
"""
Quick test script - Run from project root with backend venv activated
Usage:
cd /path/to/munich-news
source backend/venv/bin/activate # or backend/venv/Scripts/activate on Windows
python test_feeds_quick.py
"""
import sys
sys.path.insert(0, 'backend')
from pymongo import MongoClient
from config import Config
import feedparser
from utils.rss_utils import extract_article_url, extract_article_summary, extract_published_date
print("="*80)
print("RSS Feed Test - Checking Database Feeds")
print("="*80)
# Connect to database
client = MongoClient(Config.MONGODB_URI)
db = client[Config.DB_NAME]
# Get RSS feeds
feeds = list(db['rss_feeds'].find())
if not feeds:
print("\n❌ No RSS feeds in database!")
print("\nAdd a feed first:")
print(" curl -X POST http://localhost:5001/api/rss-feeds \\")
print(" -H 'Content-Type: application/json' \\")
print(" -d '{\"name\": \"Test Feed\", \"url\": \"https://rss.sueddeutsche.de/rss/Politik\"}'")
sys.exit(1)
print(f"\n✓ Found {len(feeds)} feed(s) in database\n")
# Test each feed
for feed_doc in feeds:
name = feed_doc.get('name', 'Unknown')
url = feed_doc.get('url', '')
active = feed_doc.get('active', True)
print(f"\n{'='*80}")
print(f"Feed: {name}")
print(f"URL: {url}")
print(f"Active: {active}")
print('='*80)
if not active:
print("⏭ Skipping (inactive)")
continue
try:
# Parse RSS
print("Fetching RSS feed...")
feed = feedparser.parse(url)
if not feed.entries:
print("❌ No entries found")
continue
print(f"✓ Found {len(feed.entries)} entries\n")
# Test first 3 entries
for i, entry in enumerate(feed.entries[:3], 1):
print(f"\n--- Entry {i} ---")
title = entry.get('title', 'No title')
print(f"Title: {title[:70]}")
# Test URL extraction
article_url = extract_article_url(entry)
if article_url:
print(f"✓ URL extracted: {article_url}")
else:
print(f"❌ Could not extract URL")
print(f" Available fields: {list(entry.keys())[:10]}")
print(f" link: {entry.get('link', 'N/A')}")
print(f" guid: {entry.get('guid', 'N/A')}")
# Test summary
summary = extract_article_summary(entry)
if summary:
print(f"✓ Summary: {summary[:80]}...")
# Test date
pub_date = extract_published_date(entry)
if pub_date:
print(f"✓ Date: {pub_date}")
except Exception as e:
print(f"❌ Error: {e}")
print("\n" + "="*80)
print("Test complete!")
print("="*80)