#!/usr/bin/env python3 """ Scheduled newsletter sender that runs daily at 7 AM Berlin time Waits for crawler to finish before sending to ensure fresh content """ import schedule import time from datetime import datetime, timedelta import pytz from pathlib import Path import sys # Add current directory to path sys.path.insert(0, str(Path(__file__).parent)) from sender_service import send_newsletter, get_latest_articles, Config # Berlin timezone BERLIN_TZ = pytz.timezone('Europe/Berlin') # Maximum time to wait for crawler (in minutes) MAX_WAIT_TIME = 30 def check_crawler_finished(): """ Check if crawler has finished by looking for recent articles Returns: (bool, str) - (is_finished, message) """ try: # Check if we have articles from today articles = get_latest_articles(max_articles=1, hours=2) if articles: # Check if the most recent article was crawled recently (within last 2 hours) latest_article = articles[0] crawled_at = latest_article.get('crawled_at') if crawled_at: time_since_crawl = datetime.utcnow() - crawled_at minutes_since = time_since_crawl.total_seconds() / 60 if minutes_since < 120: # Within last 2 hours return True, f"Crawler finished {int(minutes_since)} minutes ago" return False, "No recent articles found" except Exception as e: return False, f"Error checking crawler status: {e}" def wait_for_crawler(max_wait_minutes=30): """ Wait for crawler to finish before sending newsletter Args: max_wait_minutes: Maximum time to wait in minutes Returns: bool: True if crawler finished, False if timeout """ berlin_time = datetime.now(BERLIN_TZ) print(f"\nā³ Waiting for crawler to finish...") print(f" Current time: {berlin_time.strftime('%H:%M:%S %Z')}") print(f" Max wait time: {max_wait_minutes} minutes") start_time = time.time() check_interval = 30 # Check every 30 seconds while True: elapsed_minutes = (time.time() - start_time) / 60 # Check if crawler finished is_finished, message = check_crawler_finished() if is_finished: print(f" āœ“ {message}") return True # Check if we've exceeded max wait time if elapsed_minutes >= max_wait_minutes: print(f" ⚠ Timeout after {max_wait_minutes} minutes") print(f" Proceeding with available articles...") return False # Show progress remaining = max_wait_minutes - elapsed_minutes print(f" ā³ Still waiting... ({remaining:.1f} minutes remaining) - {message}") # Wait before next check time.sleep(check_interval) def run_sender(): """Run the newsletter sender with crawler coordination""" berlin_time = datetime.now(BERLIN_TZ) print(f"\n{'='*70}") print(f"šŸ“§ Scheduled newsletter sender started") print(f" Time: {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}") print(f"{'='*70}\n") try: # Wait for crawler to finish (max 30 minutes) crawler_finished = wait_for_crawler(max_wait_minutes=MAX_WAIT_TIME) if not crawler_finished: print(f"\n⚠ Crawler may still be running, but proceeding anyway...") print(f"\n{'='*70}") print(f"šŸ“§ Starting newsletter send...") print(f"{'='*70}\n") # Send newsletter to all subscribers result = send_newsletter(max_articles=Config.MAX_ARTICLES) if result['success']: print(f"\n{'='*70}") print(f"āœ… Newsletter sent successfully!") print(f" Sent: {result['sent_count']}/{result['total_subscribers']}") print(f" Articles: {result['article_count']}") print(f" Failed: {result['failed_count']}") print(f"{'='*70}\n") else: print(f"\n{'='*70}") print(f"āŒ Newsletter send failed: {result.get('error', 'Unknown error')}") print(f"{'='*70}\n") except Exception as e: print(f"\n{'='*70}") print(f"āŒ Scheduled sender error: {e}") print(f"{'='*70}\n") import traceback traceback.print_exc() def main(): """Main scheduler loop""" print("šŸ“§ Munich News Newsletter Scheduler") print("="*70) print("Schedule: Daily at 7:00 AM Berlin time") print("Timezone: Europe/Berlin (CET/CEST)") print("Coordination: Waits for crawler to finish (max 30 min)") print("="*70) # Schedule the sender to run at 7 AM Berlin time schedule.every().day.at("07:00").do(run_sender) # Show next run time berlin_time = datetime.now(BERLIN_TZ) print(f"\nCurrent time (Berlin): {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}") # Get next scheduled run next_run = schedule.next_run() if next_run: # Convert to Berlin time for display next_run_berlin = next_run.astimezone(BERLIN_TZ) print(f"Next scheduled run: {next_run_berlin.strftime('%Y-%m-%d %H:%M:%S %Z')}") print("\nā³ Scheduler is running... (Press Ctrl+C to stop)\n") # Optional: Run immediately on startup (comment out if you don't want this) # print("šŸš€ Running initial send on startup...") # run_sender() # Keep the scheduler running while True: schedule.run_pending() time.sleep(60) # Check every minute if __name__ == '__main__': try: main() except KeyboardInterrupt: print("\n\nšŸ‘‹ Scheduler stopped by user") except Exception as e: print(f"\n\nāŒ Scheduler error: {e}") import traceback traceback.print_exc()