#!/usr/bin/env python3 """ Scheduled crawler that runs daily at 6 AM Berlin time """ import schedule import time from datetime import datetime import pytz from crawler_service import crawl_all_feeds # Berlin timezone BERLIN_TZ = pytz.timezone('Europe/Berlin') def run_crawler(): """Run the crawler and log the execution""" berlin_time = datetime.now(BERLIN_TZ) print(f"\n{'='*60}") print(f"šŸ• Scheduled crawler started at {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}") print(f"{'='*60}\n") try: # Run crawler with max 20 articles per feed result = crawl_all_feeds(max_articles_per_feed=20) print(f"\n{'='*60}") print(f"āœ“ Scheduled crawler completed successfully") print(f" Articles crawled: {result['total_articles_crawled']}") print(f" Duration: {result['duration_seconds']}s") print(f"{'='*60}\n") except Exception as e: print(f"\n{'='*60}") print(f"āœ— Scheduled crawler failed: {e}") print(f"{'='*60}\n") def main(): """Main scheduler loop""" print("šŸ¤– Munich News Crawler Scheduler") print("="*60) print("Schedule: Daily at 6:00 AM Berlin time") print("Timezone: Europe/Berlin (CET/CEST)") print("="*60) # Schedule the crawler to run at 6 AM Berlin time schedule.every().day.at("06:00").do(run_crawler) # Show next run time berlin_time = datetime.now(BERLIN_TZ) print(f"\nCurrent time (Berlin): {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}") # Get next scheduled run next_run = schedule.next_run() if next_run: # Convert to Berlin time for display next_run_berlin = next_run.astimezone(BERLIN_TZ) print(f"Next scheduled run: {next_run_berlin.strftime('%Y-%m-%d %H:%M:%S %Z')}") print("\nā³ Scheduler is running... (Press Ctrl+C to stop)\n") # Run immediately on startup (optional - comment out if you don't want this) print("šŸš€ Running initial crawl on startup...") run_crawler() # Keep the scheduler running while True: schedule.run_pending() time.sleep(60) # Check every minute if __name__ == '__main__': try: main() except KeyboardInterrupt: print("\n\nšŸ‘‹ Scheduler stopped by user") except Exception as e: print(f"\n\nāœ— Scheduler error: {e}")