Files
Munich-news/news_crawler/scheduled_crawler.py
2025-12-10 15:50:11 +00:00

76 lines
2.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Scheduled crawler that runs daily at 6 AM Berlin time
"""
import schedule
import time
from datetime import datetime
import pytz
from crawler_service import crawl_all_feeds
# Berlin timezone
BERLIN_TZ = pytz.timezone('Europe/Berlin')
def run_crawler():
"""Run the crawler and log the execution"""
berlin_time = datetime.now(BERLIN_TZ)
print(f"\n{'='*60}")
print(f"🕐 Scheduled crawler started at {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
print(f"{'='*60}\n")
try:
# Run crawler with max 20 articles per feed
result = crawl_all_feeds(max_articles_per_feed=20)
print(f"\n{'='*60}")
print(f"✓ Scheduled crawler completed successfully")
print(f" Articles crawled: {result['total_articles_crawled']}")
print(f" Duration: {result['duration_seconds']}s")
print(f"{'='*60}\n")
except Exception as e:
print(f"\n{'='*60}")
print(f"✗ Scheduled crawler failed: {e}")
print(f"{'='*60}\n")
def main():
"""Main scheduler loop"""
print("🤖 Munich News Crawler Scheduler")
print("="*60)
print("Schedule: Every 3 hours")
print("Timezone: Europe/Berlin (CET/CEST)")
print("="*60)
# Schedule the crawler to run every 3 hours
schedule.every(3).hours.do(run_crawler)
# Show next run time
berlin_time = datetime.now(BERLIN_TZ)
print(f"\nCurrent time (Berlin): {berlin_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
# Get next scheduled run
next_run = schedule.next_run()
if next_run:
# Convert to Berlin time for display
next_run_berlin = next_run.astimezone(BERLIN_TZ)
print(f"Next scheduled run: {next_run_berlin.strftime('%Y-%m-%d %H:%M:%S %Z')}")
print("\n⏳ Scheduler is running... (Press Ctrl+C to stop)\n")
# Run immediately on startup (optional - comment out if you don't want this)
print("🚀 Running initial crawl on startup...")
run_crawler()
# Keep the scheduler running
while True:
schedule.run_pending()
time.sleep(60) # Check every minute
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("\n\n👋 Scheduler stopped by user")
except Exception as e:
print(f"\n\n✗ Scheduler error: {e}")