transport
This commit is contained in:
@@ -8,3 +8,4 @@ selenium==4.15.2
|
|||||||
webdriver-manager==4.0.1
|
webdriver-manager==4.0.1
|
||||||
flask==3.0.0
|
flask==3.0.0
|
||||||
redis==5.0.1
|
redis==5.0.1
|
||||||
|
schedule==1.2.0
|
||||||
|
|||||||
71
transport_crawler/scheduled_crawler.py
Normal file
71
transport_crawler/scheduled_crawler.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Scheduled transport crawler that runs daily at 6 AM Berlin time
|
||||||
|
"""
|
||||||
|
import schedule
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
import pytz
|
||||||
|
from crawler_service import run_crawler
|
||||||
|
|
||||||
|
# Berlin timezone
|
||||||
|
BERLIN_TZ = pytz.timezone('Europe/Berlin')
|
||||||
|
|
||||||
|
def run_transport_crawler():
|
||||||
|
"""Run the transport crawler and log the execution"""
|
||||||
|
berlin_time = datetime.now(BERLIN_TZ)
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"🕐 Scheduled transport crawler started at {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run crawler
|
||||||
|
result = run_crawler()
|
||||||
|
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"✓ Scheduled transport crawler completed successfully")
|
||||||
|
print(f" Total disruptions: {result['total_disruptions']}")
|
||||||
|
print(f" MVG disruptions: {result['mvg_disruptions']}")
|
||||||
|
print(f" S-Bahn disruptions: {result['sbahn_disruptions']}")
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"✗ Scheduled transport crawler failed: {e}")
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main scheduler loop"""
|
||||||
|
print("🚇 Munich Transport Crawler Scheduler")
|
||||||
|
print("="*60)
|
||||||
|
print("Schedule: Daily at 6:00 AM Berlin time")
|
||||||
|
print("Timezone: Europe/Berlin (CET/CEST)")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
# Schedule the crawler to run at 6 AM Berlin time
|
||||||
|
schedule.every().day.at("06:00").do(run_transport_crawler)
|
||||||
|
|
||||||
|
# Show next run time
|
||||||
|
berlin_time = datetime.now(BERLIN_TZ)
|
||||||
|
print(f"\nCurrent time (Berlin): {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
|
||||||
|
|
||||||
|
next_run = schedule.next_run()
|
||||||
|
if next_run:
|
||||||
|
print(f"Next scheduled run: {next_run.strftime('%Y-%m-%d %H:%M:%S %Z')}")
|
||||||
|
|
||||||
|
print("\n⏳ Scheduler is running... (Press Ctrl+C to stop)\n")
|
||||||
|
|
||||||
|
# Run once on startup
|
||||||
|
print("🚀 Running initial crawl on startup...\n")
|
||||||
|
run_transport_crawler()
|
||||||
|
|
||||||
|
# Keep running
|
||||||
|
while True:
|
||||||
|
schedule.run_pending()
|
||||||
|
time.sleep(60) # Check every minute
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
main()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\n👋 Scheduler stopped by user")
|
||||||
@@ -1,8 +1,11 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# Start both the API server and the worker
|
# Start the scheduler, worker, and API server
|
||||||
|
|
||||||
# Start the worker in the background
|
# Start the worker in the background
|
||||||
python -u worker.py &
|
python -u worker.py &
|
||||||
|
|
||||||
# Start the API server in the foreground
|
# Start the API server in the background
|
||||||
python -u api_service.py
|
python -u api_service.py &
|
||||||
|
|
||||||
|
# Start the scheduler in the foreground
|
||||||
|
python -u scheduled_crawler.py
|
||||||
|
|||||||
Reference in New Issue
Block a user