179 lines
5.8 KiB
Python
Executable File
179 lines
5.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Scheduled newsletter sender that runs daily at 7 AM Berlin time
|
|
Waits for crawler to finish before sending to ensure fresh content
|
|
"""
|
|
import schedule
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
import pytz
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
# Add current directory to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from sender_service import send_newsletter, get_latest_articles, Config
|
|
|
|
# Berlin timezone
|
|
BERLIN_TZ = pytz.timezone('Europe/Berlin')
|
|
|
|
# Maximum time to wait for crawler (in minutes)
|
|
MAX_WAIT_TIME = 30
|
|
|
|
def check_crawler_finished():
|
|
"""
|
|
Check if crawler has finished by looking for recent articles
|
|
Returns: (bool, str) - (is_finished, message)
|
|
"""
|
|
try:
|
|
# Check if we have articles from today
|
|
articles = get_latest_articles(max_articles=1, hours=2)
|
|
|
|
if articles:
|
|
# Check if the most recent article was crawled recently (within last 2 hours)
|
|
latest_article = articles[0]
|
|
crawled_at = latest_article.get('crawled_at')
|
|
|
|
if crawled_at:
|
|
time_since_crawl = datetime.utcnow() - crawled_at
|
|
minutes_since = time_since_crawl.total_seconds() / 60
|
|
|
|
if minutes_since < 120: # Within last 2 hours
|
|
return True, f"Crawler finished {int(minutes_since)} minutes ago"
|
|
|
|
return False, "No recent articles found"
|
|
|
|
except Exception as e:
|
|
return False, f"Error checking crawler status: {e}"
|
|
|
|
|
|
def wait_for_crawler(max_wait_minutes=30):
|
|
"""
|
|
Wait for crawler to finish before sending newsletter
|
|
|
|
Args:
|
|
max_wait_minutes: Maximum time to wait in minutes
|
|
|
|
Returns:
|
|
bool: True if crawler finished, False if timeout
|
|
"""
|
|
berlin_time = datetime.now(BERLIN_TZ)
|
|
print(f"\n⏳ Waiting for crawler to finish...")
|
|
print(f" Current time: {berlin_time.strftime('%H:%M:%S %Z')}")
|
|
print(f" Max wait time: {max_wait_minutes} minutes")
|
|
|
|
start_time = time.time()
|
|
check_interval = 30 # Check every 30 seconds
|
|
|
|
while True:
|
|
elapsed_minutes = (time.time() - start_time) / 60
|
|
|
|
# Check if crawler finished
|
|
is_finished, message = check_crawler_finished()
|
|
|
|
if is_finished:
|
|
print(f" ✓ {message}")
|
|
return True
|
|
|
|
# Check if we've exceeded max wait time
|
|
if elapsed_minutes >= max_wait_minutes:
|
|
print(f" ⚠ Timeout after {max_wait_minutes} minutes")
|
|
print(f" Proceeding with available articles...")
|
|
return False
|
|
|
|
# Show progress
|
|
remaining = max_wait_minutes - elapsed_minutes
|
|
print(f" ⏳ Still waiting... ({remaining:.1f} minutes remaining) - {message}")
|
|
|
|
# Wait before next check
|
|
time.sleep(check_interval)
|
|
|
|
|
|
def run_sender():
|
|
"""Run the newsletter sender with crawler coordination"""
|
|
berlin_time = datetime.now(BERLIN_TZ)
|
|
print(f"\n{'='*70}")
|
|
print(f"📧 Scheduled newsletter sender started")
|
|
print(f" Time: {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
|
|
print(f"{'='*70}\n")
|
|
|
|
try:
|
|
# Wait for crawler to finish (max 30 minutes)
|
|
crawler_finished = wait_for_crawler(max_wait_minutes=MAX_WAIT_TIME)
|
|
|
|
if not crawler_finished:
|
|
print(f"\n⚠ Crawler may still be running, but proceeding anyway...")
|
|
|
|
print(f"\n{'='*70}")
|
|
print(f"📧 Starting newsletter send...")
|
|
print(f"{'='*70}\n")
|
|
|
|
# Send newsletter to all subscribers
|
|
result = send_newsletter(max_articles=Config.MAX_ARTICLES)
|
|
|
|
if result['success']:
|
|
print(f"\n{'='*70}")
|
|
print(f"✅ Newsletter sent successfully!")
|
|
print(f" Sent: {result['sent_count']}/{result['total_subscribers']}")
|
|
print(f" Articles: {result['article_count']}")
|
|
print(f" Failed: {result['failed_count']}")
|
|
print(f"{'='*70}\n")
|
|
else:
|
|
print(f"\n{'='*70}")
|
|
print(f"❌ Newsletter send failed: {result.get('error', 'Unknown error')}")
|
|
print(f"{'='*70}\n")
|
|
|
|
except Exception as e:
|
|
print(f"\n{'='*70}")
|
|
print(f"❌ Scheduled sender error: {e}")
|
|
print(f"{'='*70}\n")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
|
|
def main():
|
|
"""Main scheduler loop"""
|
|
print("📧 Munich News Newsletter Scheduler")
|
|
print("="*70)
|
|
print("Schedule: Daily at 7:00 AM Berlin time")
|
|
print("Timezone: Europe/Berlin (CET/CEST)")
|
|
print("Coordination: Waits for crawler to finish (max 30 min)")
|
|
print("="*70)
|
|
|
|
# Schedule the sender to run at 7 AM Berlin time
|
|
schedule.every().day.at("07:00").do(run_sender)
|
|
|
|
# Show next run time
|
|
berlin_time = datetime.now(BERLIN_TZ)
|
|
print(f"\nCurrent time (Berlin): {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
|
|
|
|
# Get next scheduled run
|
|
next_run = schedule.next_run()
|
|
if next_run:
|
|
# Convert to Berlin time for display
|
|
next_run_berlin = next_run.astimezone(BERLIN_TZ)
|
|
print(f"Next scheduled run: {next_run_berlin.strftime('%Y-%m-%d %H:%M:%S %Z')}")
|
|
|
|
print("\n⏳ Scheduler is running... (Press Ctrl+C to stop)\n")
|
|
|
|
# Optional: Run immediately on startup (comment out if you don't want this)
|
|
# print("🚀 Running initial send on startup...")
|
|
# run_sender()
|
|
|
|
# Keep the scheduler running
|
|
while True:
|
|
schedule.run_pending()
|
|
time.sleep(60) # Check every minute
|
|
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
main()
|
|
except KeyboardInterrupt:
|
|
print("\n\n👋 Scheduler stopped by user")
|
|
except Exception as e:
|
|
print(f"\n\n❌ Scheduler error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|