diff --git a/news_sender/sender_service.py b/news_sender/sender_service.py index ad43d2b..cfdc24d 100644 --- a/news_sender/sender_service.py +++ b/news_sender/sender_service.py @@ -38,6 +38,7 @@ class Config: # Newsletter MAX_ARTICLES = int(os.getenv('NEWSLETTER_MAX_ARTICLES', '10')) + HOURS_LOOKBACK = int(os.getenv('NEWSLETTER_HOURS_LOOKBACK', '24')) WEBSITE_URL = os.getenv('WEBSITE_URL', 'http://localhost:3000') @@ -48,19 +49,51 @@ articles_collection = db['articles'] subscribers_collection = db['subscribers'] -def get_latest_articles(max_articles=10): +def get_latest_articles(max_articles=10, hours=24): """ - Get latest articles with AI summaries from database + Get latest articles with AI summaries from database (from today only) + + Args: + max_articles: Maximum number of articles to return + hours: Number of hours to look back (default 24) Returns: - list: Articles with summaries + list: Articles with summaries published today """ - cursor = articles_collection.find( - {'summary': {'$exists': True, '$ne': None}} - ).sort('created_at', -1).limit(max_articles) + from datetime import timedelta + + # Calculate cutoff time (e.g., 24 hours ago) + cutoff_time = datetime.utcnow() - timedelta(hours=hours) + + # Get start of today (00:00:00 UTC) + today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) + + # Query for articles with summaries published today OR created today + # This ensures we only get fresh articles from today + cursor = articles_collection.find({ + 'summary': {'$exists': True, '$ne': None}, + '$or': [ + # Articles published today (if published_at is available) + {'published_at': {'$gte': today_start}}, + # Articles created today (fallback if published_at is missing) + {'created_at': {'$gte': today_start}} + ] + }).sort('created_at', -1).limit(max_articles) articles = [] for doc in cursor: + # Double-check the date to ensure it's from today + published_at = doc.get('published_at') + created_at = doc.get('created_at') + + # Skip if both dates are old (extra safety check) + if published_at and isinstance(published_at, datetime): + if published_at < today_start: + continue + elif created_at and isinstance(created_at, datetime): + if created_at < today_start: + continue + articles.append({ 'title': doc.get('title', ''), 'author': doc.get('author'), @@ -179,20 +212,23 @@ def send_newsletter(max_articles=None, test_email=None): 'error': 'Email credentials not configured' } - # Get articles + # Get articles from today only max_articles = max_articles or Config.MAX_ARTICLES - print(f"\nFetching latest {max_articles} articles with AI summaries...") - articles = get_latest_articles(max_articles) + today_date = datetime.now().strftime('%B %d, %Y') + print(f"\nFetching articles published TODAY ({today_date})...") + print(f" Max articles: {max_articles}") + articles = get_latest_articles(max_articles, hours=Config.HOURS_LOOKBACK) if not articles: - print("❌ No articles with summaries found") - print(" Run the crawler with Ollama enabled first") + print("❌ No articles from today with summaries found") + print(f" No articles published today ({today_date})") + print(" Run the crawler with Ollama enabled to get fresh content") return { 'success': False, - 'error': 'No articles with summaries' + 'error': f'No articles published today' } - print(f"✓ Found {len(articles)} articles") + print(f"✓ Found {len(articles)} recent article(s)") # Get subscribers if test_email: @@ -255,21 +291,24 @@ def send_newsletter(max_articles=None, test_email=None): } -def preview_newsletter(max_articles=None): +def preview_newsletter(max_articles=None, hours=None): """ Generate newsletter HTML for preview (doesn't send) Args: max_articles: Maximum number of articles to include + hours: Hours to look back (default from config) Returns: str: HTML content """ max_articles = max_articles or Config.MAX_ARTICLES - articles = get_latest_articles(max_articles) + hours = hours or Config.HOURS_LOOKBACK + articles = get_latest_articles(max_articles, hours=hours) if not articles: - return "

No articles with summaries found

Run the crawler with Ollama enabled first.

" + today_date = datetime.now().strftime('%B %d, %Y') + return f"

No articles from today found

No articles published today ({today_date}). Run the crawler with Ollama enabled to get fresh content.

" return render_newsletter_html(articles)