Files
Munich-news/news_sender/scheduled_sender.py
2025-11-11 14:09:21 +01:00

179 lines
5.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Scheduled newsletter sender that runs daily at 7 AM Berlin time
Waits for crawler to finish before sending to ensure fresh content
"""
import schedule
import time
from datetime import datetime, timedelta
import pytz
from pathlib import Path
import sys
# Add current directory to path
sys.path.insert(0, str(Path(__file__).parent))
from sender_service import send_newsletter, get_latest_articles, Config
# Berlin timezone
BERLIN_TZ = pytz.timezone('Europe/Berlin')
# Maximum time to wait for crawler (in minutes)
MAX_WAIT_TIME = 30
def check_crawler_finished():
"""
Check if crawler has finished by looking for recent articles
Returns: (bool, str) - (is_finished, message)
"""
try:
# Check if we have articles from today
articles = get_latest_articles(max_articles=1, hours=2)
if articles:
# Check if the most recent article was crawled recently (within last 2 hours)
latest_article = articles[0]
crawled_at = latest_article.get('crawled_at')
if crawled_at:
time_since_crawl = datetime.utcnow() - crawled_at
minutes_since = time_since_crawl.total_seconds() / 60
if minutes_since < 120: # Within last 2 hours
return True, f"Crawler finished {int(minutes_since)} minutes ago"
return False, "No recent articles found"
except Exception as e:
return False, f"Error checking crawler status: {e}"
def wait_for_crawler(max_wait_minutes=30):
"""
Wait for crawler to finish before sending newsletter
Args:
max_wait_minutes: Maximum time to wait in minutes
Returns:
bool: True if crawler finished, False if timeout
"""
berlin_time = datetime.now(BERLIN_TZ)
print(f"\n⏳ Waiting for crawler to finish...")
print(f" Current time: {berlin_time.strftime('%H:%M:%S %Z')}")
print(f" Max wait time: {max_wait_minutes} minutes")
start_time = time.time()
check_interval = 30 # Check every 30 seconds
while True:
elapsed_minutes = (time.time() - start_time) / 60
# Check if crawler finished
is_finished, message = check_crawler_finished()
if is_finished:
print(f"{message}")
return True
# Check if we've exceeded max wait time
if elapsed_minutes >= max_wait_minutes:
print(f" ⚠ Timeout after {max_wait_minutes} minutes")
print(f" Proceeding with available articles...")
return False
# Show progress
remaining = max_wait_minutes - elapsed_minutes
print(f" ⏳ Still waiting... ({remaining:.1f} minutes remaining) - {message}")
# Wait before next check
time.sleep(check_interval)
def run_sender():
"""Run the newsletter sender with crawler coordination"""
berlin_time = datetime.now(BERLIN_TZ)
print(f"\n{'='*70}")
print(f"📧 Scheduled newsletter sender started")
print(f" Time: {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
print(f"{'='*70}\n")
try:
# Wait for crawler to finish (max 30 minutes)
crawler_finished = wait_for_crawler(max_wait_minutes=MAX_WAIT_TIME)
if not crawler_finished:
print(f"\n⚠ Crawler may still be running, but proceeding anyway...")
print(f"\n{'='*70}")
print(f"📧 Starting newsletter send...")
print(f"{'='*70}\n")
# Send newsletter to all subscribers
result = send_newsletter(max_articles=Config.MAX_ARTICLES)
if result['success']:
print(f"\n{'='*70}")
print(f"✅ Newsletter sent successfully!")
print(f" Sent: {result['sent_count']}/{result['total_subscribers']}")
print(f" Articles: {result['article_count']}")
print(f" Failed: {result['failed_count']}")
print(f"{'='*70}\n")
else:
print(f"\n{'='*70}")
print(f"❌ Newsletter send failed: {result.get('error', 'Unknown error')}")
print(f"{'='*70}\n")
except Exception as e:
print(f"\n{'='*70}")
print(f"❌ Scheduled sender error: {e}")
print(f"{'='*70}\n")
import traceback
traceback.print_exc()
def main():
"""Main scheduler loop"""
print("📧 Munich News Newsletter Scheduler")
print("="*70)
print("Schedule: Daily at 7:00 AM Berlin time")
print("Timezone: Europe/Berlin (CET/CEST)")
print("Coordination: Waits for crawler to finish (max 30 min)")
print("="*70)
# Schedule the sender to run at 7 AM Berlin time
schedule.every().day.at("07:00").do(run_sender)
# Show next run time
berlin_time = datetime.now(BERLIN_TZ)
print(f"\nCurrent time (Berlin): {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
# Get next scheduled run
next_run = schedule.next_run()
if next_run:
# Convert to Berlin time for display
next_run_berlin = next_run.astimezone(BERLIN_TZ)
print(f"Next scheduled run: {next_run_berlin.strftime('%Y-%m-%d %H:%M:%S %Z')}")
print("\n⏳ Scheduler is running... (Press Ctrl+C to stop)\n")
# Optional: Run immediately on startup (comment out if you don't want this)
# print("🚀 Running initial send on startup...")
# run_sender()
# Keep the scheduler running
while True:
schedule.run_pending()
time.sleep(60) # Check every minute
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("\n\n👋 Scheduler stopped by user")
except Exception as e:
print(f"\n\n❌ Scheduler error: {e}")
import traceback
traceback.print_exc()