update
This commit is contained in:
233
transport_crawler/crawler_service.py
Normal file
233
transport_crawler/crawler_service.py
Normal file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Transport Crawler Service - Main orchestrator
|
||||
Fetches disruptions from multiple sources and displays them
|
||||
"""
|
||||
from datetime import datetime
|
||||
from mvg_api_client import MVGClient
|
||||
from db_api_client import DBClient
|
||||
|
||||
def print_header():
|
||||
"""Print header"""
|
||||
print("\n" + "="*70)
|
||||
print("🚇 Munich Transport Disruption Crawler")
|
||||
print("="*70)
|
||||
print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print("="*70)
|
||||
|
||||
def print_disruption_summary(all_disruptions):
|
||||
"""Print summary of all disruptions"""
|
||||
if not all_disruptions:
|
||||
print("\n✅ No disruptions found - All lines operating normally!")
|
||||
return
|
||||
|
||||
print(f"\n📊 SUMMARY: {len(all_disruptions)} Active Disruptions")
|
||||
print("="*70)
|
||||
|
||||
# Group by type
|
||||
by_type = {}
|
||||
for d in all_disruptions:
|
||||
dtype = d.get('type', 'unknown')
|
||||
by_type[dtype] = by_type.get(dtype, 0) + 1
|
||||
|
||||
print("\nBy Type:")
|
||||
for dtype, count in sorted(by_type.items()):
|
||||
icon = {
|
||||
'maintenance': '🔧',
|
||||
'disruption': '⚠️',
|
||||
'delay': '⏱️',
|
||||
'info': 'ℹ️'
|
||||
}.get(dtype, '❓')
|
||||
print(f" {icon} {dtype.title()}: {count}")
|
||||
|
||||
# Group by source
|
||||
by_source = {}
|
||||
for d in all_disruptions:
|
||||
source = d.get('source', 'unknown')
|
||||
by_source[source] = by_source.get(source, 0) + 1
|
||||
|
||||
print("\nBy Source:")
|
||||
for source, count in sorted(by_source.items()):
|
||||
print(f" • {source}: {count}")
|
||||
|
||||
def print_disruptions(disruptions, title):
|
||||
"""Print disruptions in a formatted way"""
|
||||
if not disruptions:
|
||||
return
|
||||
|
||||
print(f"\n{title}")
|
||||
print("-"*70)
|
||||
|
||||
for i, d in enumerate(disruptions, 1):
|
||||
# Icon based on type
|
||||
icon = {
|
||||
'maintenance': '🔧',
|
||||
'disruption': '⚠️',
|
||||
'delay': '⏱️',
|
||||
'info': 'ℹ️'
|
||||
}.get(d.get('type', 'info'), '❓')
|
||||
|
||||
print(f"\n{icon} [{i}] {d.get('title', 'No title')}")
|
||||
|
||||
# Lines affected
|
||||
lines = d.get('lines', [])
|
||||
if lines:
|
||||
line_str = ', '.join(lines)
|
||||
print(f" 🚇 Lines: {line_str}")
|
||||
|
||||
# Time range
|
||||
start = d.get('start_time')
|
||||
end = d.get('end_time')
|
||||
if start or end:
|
||||
time_str = ""
|
||||
if start:
|
||||
time_str += f"From: {start.strftime('%d.%m %H:%M')}"
|
||||
if end:
|
||||
if time_str:
|
||||
time_str += " → "
|
||||
time_str += f"Until: {end.strftime('%d.%m %H:%M')}"
|
||||
print(f" ⏰ {time_str}")
|
||||
|
||||
# Description
|
||||
desc = d.get('description', '')
|
||||
if desc:
|
||||
# Truncate long descriptions
|
||||
if len(desc) > 150:
|
||||
desc = desc[:150] + "..."
|
||||
print(f" 📝 {desc}")
|
||||
|
||||
# Severity
|
||||
severity = d.get('severity', 'medium')
|
||||
severity_icon = {
|
||||
'high': '🔴',
|
||||
'medium': '🟡',
|
||||
'low': '🟢'
|
||||
}.get(severity, '⚪')
|
||||
print(f" {severity_icon} Severity: {severity}")
|
||||
|
||||
def save_to_mongodb(disruptions):
|
||||
"""Save disruptions to MongoDB"""
|
||||
try:
|
||||
from pymongo import MongoClient
|
||||
import os
|
||||
|
||||
mongo_uri = os.getenv('MONGODB_URI', 'mongodb://admin:changeme@mongodb:27017/')
|
||||
client = MongoClient(mongo_uri)
|
||||
db = client['munich_news']
|
||||
collection = db['transport_alerts']
|
||||
|
||||
print("\n💾 Saving to MongoDB...")
|
||||
|
||||
# Mark all existing alerts as inactive
|
||||
collection.update_many({}, {'$set': {'is_active': False}})
|
||||
|
||||
# Insert or update current disruptions
|
||||
saved_count = 0
|
||||
for d in disruptions:
|
||||
# Use disruption ID as unique identifier
|
||||
collection.update_one(
|
||||
{'alert_id': d['id']},
|
||||
{
|
||||
'$set': {
|
||||
'alert_id': d['id'],
|
||||
'title': d['title'],
|
||||
'description': d['description'],
|
||||
'lines': d['lines'],
|
||||
'type': d['type'],
|
||||
'severity': d['severity'],
|
||||
'start_time': d['start_time'],
|
||||
'end_time': d['end_time'],
|
||||
'source': d['source'],
|
||||
'is_active': True,
|
||||
'updated_at': datetime.utcnow()
|
||||
}
|
||||
},
|
||||
upsert=True
|
||||
)
|
||||
saved_count += 1
|
||||
|
||||
print(f"✓ Saved {saved_count} disruptions to MongoDB")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ MongoDB error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def run_crawler():
|
||||
"""Main crawler function"""
|
||||
print_header()
|
||||
|
||||
all_disruptions = []
|
||||
|
||||
# 1. Fetch MVG disruptions (U-Bahn, Tram, Bus)
|
||||
print("\n📡 Fetching data from sources...")
|
||||
print("-"*70)
|
||||
|
||||
mvg_client = MVGClient()
|
||||
mvg_disruptions = mvg_client.get_disruptions()
|
||||
all_disruptions.extend(mvg_disruptions)
|
||||
|
||||
# 2. Fetch S-Bahn disruptions
|
||||
db_client = DBClient()
|
||||
sbahn_disruptions = db_client.get_sbahn_disruptions()
|
||||
all_disruptions.extend(sbahn_disruptions)
|
||||
|
||||
# 3. Print summary
|
||||
print_disruption_summary(all_disruptions)
|
||||
|
||||
# 4. Print detailed disruptions
|
||||
if mvg_disruptions:
|
||||
print_disruptions(mvg_disruptions, "\n🚇 MVG DISRUPTIONS (U-Bahn, Tram, Bus)")
|
||||
|
||||
if sbahn_disruptions:
|
||||
print_disruptions(sbahn_disruptions, "\n🚆 S-BAHN DISRUPTIONS")
|
||||
|
||||
# 5. Output JSON
|
||||
print("\n" + "="*70)
|
||||
print("📄 JSON OUTPUT")
|
||||
print("="*70)
|
||||
|
||||
import json
|
||||
output = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'total_disruptions': len(all_disruptions),
|
||||
'mvg_disruptions': len(mvg_disruptions),
|
||||
'sbahn_disruptions': len(sbahn_disruptions),
|
||||
'disruptions': []
|
||||
}
|
||||
|
||||
for d in all_disruptions:
|
||||
output['disruptions'].append({
|
||||
'id': d.get('id'),
|
||||
'title': d.get('title'),
|
||||
'description': d.get('description'),
|
||||
'lines': d.get('lines', []),
|
||||
'type': d.get('type'),
|
||||
'severity': d.get('severity'),
|
||||
'start_time': d.get('start_time').isoformat() if d.get('start_time') else None,
|
||||
'end_time': d.get('end_time').isoformat() if d.get('end_time') else None,
|
||||
'source': d.get('source')
|
||||
})
|
||||
|
||||
print(json.dumps(output, indent=2, ensure_ascii=False))
|
||||
|
||||
# 6. Save to MongoDB
|
||||
save_to_mongodb(all_disruptions)
|
||||
|
||||
# Footer
|
||||
print("\n" + "="*70)
|
||||
print("✓ Crawler finished")
|
||||
print("="*70 + "\n")
|
||||
|
||||
return output
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
disruptions = run_crawler()
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n👋 Crawler stopped by user")
|
||||
except Exception as e:
|
||||
print(f"\n\n❌ Crawler error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
Reference in New Issue
Block a user