#!/usr/bin/env python3 """ Scheduled transport crawler that runs daily at 6 AM Berlin time """ import schedule import time from datetime import datetime import pytz from crawler_service import run_crawler # Berlin timezone BERLIN_TZ = pytz.timezone('Europe/Berlin') def run_transport_crawler(): """Run the transport crawler and log the execution""" berlin_time = datetime.now(BERLIN_TZ) print(f"\n{'='*60}") print(f"šŸ• Scheduled transport crawler started at {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}") print(f"{'='*60}\n") try: # Run crawler result = run_crawler() print(f"\n{'='*60}") print(f"āœ“ Scheduled transport crawler completed successfully") print(f" Total disruptions: {result['total_disruptions']}") print(f" MVG disruptions: {result['mvg_disruptions']}") print(f" S-Bahn disruptions: {result['sbahn_disruptions']}") print(f"{'='*60}\n") except Exception as e: print(f"\n{'='*60}") print(f"āœ— Scheduled transport crawler failed: {e}") print(f"{'='*60}\n") def main(): """Main scheduler loop""" print("šŸš‡ Munich Transport Crawler Scheduler") print("="*60) print("Schedule: Daily at 6:00 AM Berlin time") print("Timezone: Europe/Berlin (CET/CEST)") print("="*60) # Schedule the crawler to run at 6 AM Berlin time schedule.every().day.at("06:00").do(run_transport_crawler) # Show next run time berlin_time = datetime.now(BERLIN_TZ) print(f"\nCurrent time (Berlin): {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}") next_run = schedule.next_run() if next_run: print(f"Next scheduled run: {next_run.strftime('%Y-%m-%d %H:%M:%S %Z')}") print("\nā³ Scheduler is running... (Press Ctrl+C to stop)\n") # Run once on startup print("šŸš€ Running initial crawl on startup...\n") run_transport_crawler() # Keep running while True: schedule.run_pending() time.sleep(60) # Check every minute if __name__ == '__main__': try: main() except KeyboardInterrupt: print("\n\nšŸ‘‹ Scheduler stopped by user")