#!/usr/bin/env python3 """ Transport Crawler Service - Main orchestrator Fetches disruptions from multiple sources and displays them """ from datetime import datetime from mvg_api_client import MVGClient from db_api_client import DBClient def print_header(): """Print header""" print("\n" + "="*70) print("šŸš‡ Munich Transport Disruption Crawler") print("="*70) print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print("="*70) def print_disruption_summary(all_disruptions): """Print summary of all disruptions""" if not all_disruptions: print("\nāœ… No disruptions found - All lines operating normally!") return print(f"\nšŸ“Š SUMMARY: {len(all_disruptions)} Active Disruptions") print("="*70) # Group by type by_type = {} for d in all_disruptions: dtype = d.get('type', 'unknown') by_type[dtype] = by_type.get(dtype, 0) + 1 print("\nBy Type:") for dtype, count in sorted(by_type.items()): icon = { 'maintenance': 'šŸ”§', 'disruption': 'āš ļø', 'delay': 'ā±ļø', 'info': 'ā„¹ļø' }.get(dtype, 'ā“') print(f" {icon} {dtype.title()}: {count}") # Group by source by_source = {} for d in all_disruptions: source = d.get('source', 'unknown') by_source[source] = by_source.get(source, 0) + 1 print("\nBy Source:") for source, count in sorted(by_source.items()): print(f" • {source}: {count}") def print_disruptions(disruptions, title): """Print disruptions in a formatted way""" if not disruptions: return print(f"\n{title}") print("-"*70) for i, d in enumerate(disruptions, 1): # Icon based on type icon = { 'maintenance': 'šŸ”§', 'disruption': 'āš ļø', 'delay': 'ā±ļø', 'info': 'ā„¹ļø' }.get(d.get('type', 'info'), 'ā“') print(f"\n{icon} [{i}] {d.get('title', 'No title')}") # Lines affected lines = d.get('lines', []) if lines: line_str = ', '.join(lines) print(f" šŸš‡ Lines: {line_str}") # Time range start = d.get('start_time') end = d.get('end_time') if start or end: time_str = "" if start: time_str += f"From: {start.strftime('%d.%m %H:%M')}" if end: if time_str: time_str += " → " time_str += f"Until: {end.strftime('%d.%m %H:%M')}" print(f" ā° {time_str}") # Description desc = d.get('description', '') if desc: # Truncate long descriptions if len(desc) > 150: desc = desc[:150] + "..." print(f" šŸ“ {desc}") # Severity severity = d.get('severity', 'medium') severity_icon = { 'high': 'šŸ”“', 'medium': '🟔', 'low': '🟢' }.get(severity, '⚪') print(f" {severity_icon} Severity: {severity}") def save_to_mongodb(disruptions): """Save disruptions to MongoDB""" try: from pymongo import MongoClient import os mongo_uri = os.getenv('MONGODB_URI', 'mongodb://admin:changeme@mongodb:27017/') client = MongoClient(mongo_uri) db = client['munich_news'] collection = db['transport_alerts'] print("\nšŸ’¾ Saving to MongoDB...") # Mark all existing alerts as inactive collection.update_many({}, {'$set': {'is_active': False}}) # Insert or update current disruptions saved_count = 0 for d in disruptions: # Use disruption ID as unique identifier collection.update_one( {'alert_id': d['id']}, { '$set': { 'alert_id': d['id'], 'title': d['title'], 'description': d['description'], 'lines': d['lines'], 'type': d['type'], 'severity': d['severity'], 'start_time': d['start_time'], 'end_time': d['end_time'], 'source': d['source'], 'is_active': True, 'updated_at': datetime.utcnow() } }, upsert=True ) saved_count += 1 print(f"āœ“ Saved {saved_count} disruptions to MongoDB") return True except Exception as e: print(f"āœ— MongoDB error: {e}") return False def run_crawler(): """Main crawler function""" print_header() all_disruptions = [] # 1. Fetch MVG disruptions (U-Bahn, Tram, Bus) print("\nšŸ“” Fetching data from sources...") print("-"*70) mvg_client = MVGClient() mvg_disruptions = mvg_client.get_disruptions() all_disruptions.extend(mvg_disruptions) # 2. Fetch S-Bahn disruptions db_client = DBClient() sbahn_disruptions = db_client.get_sbahn_disruptions() all_disruptions.extend(sbahn_disruptions) # 3. Print summary print_disruption_summary(all_disruptions) # 4. Print detailed disruptions if mvg_disruptions: print_disruptions(mvg_disruptions, "\nšŸš‡ MVG DISRUPTIONS (U-Bahn, Tram, Bus)") if sbahn_disruptions: print_disruptions(sbahn_disruptions, "\nšŸš† S-BAHN DISRUPTIONS") # 5. Output JSON print("\n" + "="*70) print("šŸ“„ JSON OUTPUT") print("="*70) import json output = { 'timestamp': datetime.now().isoformat(), 'total_disruptions': len(all_disruptions), 'mvg_disruptions': len(mvg_disruptions), 'sbahn_disruptions': len(sbahn_disruptions), 'disruptions': [] } for d in all_disruptions: output['disruptions'].append({ 'id': d.get('id'), 'title': d.get('title'), 'description': d.get('description'), 'lines': d.get('lines', []), 'type': d.get('type'), 'severity': d.get('severity'), 'start_time': d.get('start_time').isoformat() if d.get('start_time') else None, 'end_time': d.get('end_time').isoformat() if d.get('end_time') else None, 'source': d.get('source') }) print(json.dumps(output, indent=2, ensure_ascii=False)) # 6. Save to MongoDB save_to_mongodb(all_disruptions) # Footer print("\n" + "="*70) print("āœ“ Crawler finished") print("="*70 + "\n") return output if __name__ == '__main__': try: disruptions = run_crawler() except KeyboardInterrupt: print("\n\nšŸ‘‹ Crawler stopped by user") except Exception as e: print(f"\n\nāŒ Crawler error: {e}") import traceback traceback.print_exc()