update
This commit is contained in:
@@ -38,6 +38,7 @@ class Config:
|
|||||||
|
|
||||||
# Newsletter
|
# Newsletter
|
||||||
MAX_ARTICLES = int(os.getenv('NEWSLETTER_MAX_ARTICLES', '10'))
|
MAX_ARTICLES = int(os.getenv('NEWSLETTER_MAX_ARTICLES', '10'))
|
||||||
|
HOURS_LOOKBACK = int(os.getenv('NEWSLETTER_HOURS_LOOKBACK', '24'))
|
||||||
WEBSITE_URL = os.getenv('WEBSITE_URL', 'http://localhost:3000')
|
WEBSITE_URL = os.getenv('WEBSITE_URL', 'http://localhost:3000')
|
||||||
|
|
||||||
|
|
||||||
@@ -48,19 +49,51 @@ articles_collection = db['articles']
|
|||||||
subscribers_collection = db['subscribers']
|
subscribers_collection = db['subscribers']
|
||||||
|
|
||||||
|
|
||||||
def get_latest_articles(max_articles=10):
|
def get_latest_articles(max_articles=10, hours=24):
|
||||||
"""
|
"""
|
||||||
Get latest articles with AI summaries from database
|
Get latest articles with AI summaries from database (from today only)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
max_articles: Maximum number of articles to return
|
||||||
|
hours: Number of hours to look back (default 24)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list: Articles with summaries
|
list: Articles with summaries published today
|
||||||
"""
|
"""
|
||||||
cursor = articles_collection.find(
|
from datetime import timedelta
|
||||||
{'summary': {'$exists': True, '$ne': None}}
|
|
||||||
).sort('created_at', -1).limit(max_articles)
|
# Calculate cutoff time (e.g., 24 hours ago)
|
||||||
|
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
|
||||||
|
|
||||||
|
# Get start of today (00:00:00 UTC)
|
||||||
|
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
|
# Query for articles with summaries published today OR created today
|
||||||
|
# This ensures we only get fresh articles from today
|
||||||
|
cursor = articles_collection.find({
|
||||||
|
'summary': {'$exists': True, '$ne': None},
|
||||||
|
'$or': [
|
||||||
|
# Articles published today (if published_at is available)
|
||||||
|
{'published_at': {'$gte': today_start}},
|
||||||
|
# Articles created today (fallback if published_at is missing)
|
||||||
|
{'created_at': {'$gte': today_start}}
|
||||||
|
]
|
||||||
|
}).sort('created_at', -1).limit(max_articles)
|
||||||
|
|
||||||
articles = []
|
articles = []
|
||||||
for doc in cursor:
|
for doc in cursor:
|
||||||
|
# Double-check the date to ensure it's from today
|
||||||
|
published_at = doc.get('published_at')
|
||||||
|
created_at = doc.get('created_at')
|
||||||
|
|
||||||
|
# Skip if both dates are old (extra safety check)
|
||||||
|
if published_at and isinstance(published_at, datetime):
|
||||||
|
if published_at < today_start:
|
||||||
|
continue
|
||||||
|
elif created_at and isinstance(created_at, datetime):
|
||||||
|
if created_at < today_start:
|
||||||
|
continue
|
||||||
|
|
||||||
articles.append({
|
articles.append({
|
||||||
'title': doc.get('title', ''),
|
'title': doc.get('title', ''),
|
||||||
'author': doc.get('author'),
|
'author': doc.get('author'),
|
||||||
@@ -179,20 +212,23 @@ def send_newsletter(max_articles=None, test_email=None):
|
|||||||
'error': 'Email credentials not configured'
|
'error': 'Email credentials not configured'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Get articles
|
# Get articles from today only
|
||||||
max_articles = max_articles or Config.MAX_ARTICLES
|
max_articles = max_articles or Config.MAX_ARTICLES
|
||||||
print(f"\nFetching latest {max_articles} articles with AI summaries...")
|
today_date = datetime.now().strftime('%B %d, %Y')
|
||||||
articles = get_latest_articles(max_articles)
|
print(f"\nFetching articles published TODAY ({today_date})...")
|
||||||
|
print(f" Max articles: {max_articles}")
|
||||||
|
articles = get_latest_articles(max_articles, hours=Config.HOURS_LOOKBACK)
|
||||||
|
|
||||||
if not articles:
|
if not articles:
|
||||||
print("❌ No articles with summaries found")
|
print("❌ No articles from today with summaries found")
|
||||||
print(" Run the crawler with Ollama enabled first")
|
print(f" No articles published today ({today_date})")
|
||||||
|
print(" Run the crawler with Ollama enabled to get fresh content")
|
||||||
return {
|
return {
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': 'No articles with summaries'
|
'error': f'No articles published today'
|
||||||
}
|
}
|
||||||
|
|
||||||
print(f"✓ Found {len(articles)} articles")
|
print(f"✓ Found {len(articles)} recent article(s)")
|
||||||
|
|
||||||
# Get subscribers
|
# Get subscribers
|
||||||
if test_email:
|
if test_email:
|
||||||
@@ -255,21 +291,24 @@ def send_newsletter(max_articles=None, test_email=None):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def preview_newsletter(max_articles=None):
|
def preview_newsletter(max_articles=None, hours=None):
|
||||||
"""
|
"""
|
||||||
Generate newsletter HTML for preview (doesn't send)
|
Generate newsletter HTML for preview (doesn't send)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
max_articles: Maximum number of articles to include
|
max_articles: Maximum number of articles to include
|
||||||
|
hours: Hours to look back (default from config)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: HTML content
|
str: HTML content
|
||||||
"""
|
"""
|
||||||
max_articles = max_articles or Config.MAX_ARTICLES
|
max_articles = max_articles or Config.MAX_ARTICLES
|
||||||
articles = get_latest_articles(max_articles)
|
hours = hours or Config.HOURS_LOOKBACK
|
||||||
|
articles = get_latest_articles(max_articles, hours=hours)
|
||||||
|
|
||||||
if not articles:
|
if not articles:
|
||||||
return "<h1>No articles with summaries found</h1><p>Run the crawler with Ollama enabled first.</p>"
|
today_date = datetime.now().strftime('%B %d, %Y')
|
||||||
|
return f"<h1>No articles from today found</h1><p>No articles published today ({today_date}). Run the crawler with Ollama enabled to get fresh content.</p>"
|
||||||
|
|
||||||
return render_newsletter_html(articles)
|
return render_newsletter_html(articles)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user