This commit is contained in:
2025-11-27 12:15:40 +01:00
parent 0d73796900
commit 1886c8c010
4 changed files with 116 additions and 29 deletions

View File

@@ -1,12 +1,87 @@
use anyhow::Result;
use osmpbf::{Element, ElementReader};
use scylla::{Session, SessionBuilder};
use scylla::SessionBuilder;
use std::collections::HashMap;
use tokio::task::JoinSet;
use std::fs::{File, OpenOptions};
use std::io::{BufWriter, Write, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use memmap2::Mmap;
const ZOOM_LEVELS: [u32; 4] = [6, 9, 12, 14];
struct NodeStore {
writer: Option<BufWriter<File>>,
mmap: Option<Mmap>,
path: PathBuf,
last_id: i64,
}
impl NodeStore {
fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
let path = path.as_ref().to_path_buf();
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(&path)?;
let writer = BufWriter::with_capacity(10 * 1024 * 1024, file); // 10MB buffer
Ok(Self {
writer: Some(writer),
mmap: None,
path,
last_id: -1,
})
}
fn insert(&mut self, id: i64, lat: f64, lon: f64) -> Result<()> {
if let Some(writer) = &mut self.writer {
if id > self.last_id + 1 {
let gap = id - self.last_id - 1;
writer.seek(SeekFrom::Current(gap * 8))?;
} else if id <= self.last_id {
writer.seek(SeekFrom::Start(id as u64 * 8))?;
}
let lat_i32 = (lat * 1e7) as i32;
let lon_i32 = (lon * 1e7) as i32;
writer.write_all(&lat_i32.to_le_bytes())?;
writer.write_all(&lon_i32.to_le_bytes())?;
self.last_id = id;
}
Ok(())
}
fn prepare_for_reading(&mut self) -> Result<()> {
self.writer = None; // Flush and close writer
let file = File::open(&self.path)?;
let mmap = unsafe { Mmap::map(&file)? };
self.mmap = Some(mmap);
Ok(())
}
fn get(&self, id: i64) -> Option<(f64, f64)> {
if let Some(mmap) = &self.mmap {
let offset = id as usize * 8;
if offset + 8 <= mmap.len() {
let chunk = &mmap[offset..offset+8];
let lat_i32 = i32::from_le_bytes(chunk[0..4].try_into().unwrap());
let lon_i32 = i32::from_le_bytes(chunk[4..8].try_into().unwrap());
if lat_i32 == 0 && lon_i32 == 0 { return None; }
return Some((lat_i32 as f64 / 1e7, lon_i32 as f64 / 1e7));
}
}
None
}
}
fn should_include(tags: &HashMap<String, String>, zoom: u32) -> bool {
if zoom >= 14 { return true; }
@@ -80,11 +155,11 @@ async fn main() -> Result<()> {
let reader = ElementReader::from_path(path)?;
// Cache for node coordinates: ID -> (lat, lon)
// Use sled for disk-based caching to avoid OOM, limit cache to 512MB
let node_cache = sled::Config::new()
.path("node_cache")
.cache_capacity(512 * 1024 * 1024)
.open()?;
// Use flat file with mmap
let cache_dir = std::env::var("CACHE_DIR").unwrap_or_else(|_| ".".to_string());
let cache_path = std::path::Path::new(&cache_dir).join("node_cache.bin");
println!("Using node cache at {:?}", cache_path);
let mut node_store = NodeStore::new(cache_path.clone())?;
// Channel for backpressure
// Producer (reader) -> Consumer (writer)
@@ -147,19 +222,17 @@ async fn main() -> Result<()> {
let tx = tx_clone;
let mut node_count = 0;
let mut way_count = 0;
let mut ways_pending = false;
// We process sequentially: Nodes first, then Ways.
// osmpbf yields nodes then ways.
// We need to detect when we switch from nodes to ways to prepare the store.
reader.for_each(|element| {
match element {
Element::Node(node) => {
node_count += 1;
// Store in sled: key=id (8 bytes), value=lat+lon (16 bytes)
let id_bytes = node.id().to_be_bytes();
let mut coords = [0u8; 16];
coords[0..8].copy_from_slice(&node.lat().to_be_bytes());
coords[8..16].copy_from_slice(&node.lon().to_be_bytes());
let _ = node_cache.insert(id_bytes, &coords);
let _ = node_store.insert(node.id(), node.lat(), node.lon());
if node.tags().count() > 0 {
let id = node.id();
@@ -178,13 +251,7 @@ async fn main() -> Result<()> {
}
Element::DenseNode(node) => {
node_count += 1;
// Store in sled
let id_bytes = node.id().to_be_bytes();
let mut coords = [0u8; 16];
coords[0..8].copy_from_slice(&node.lat().to_be_bytes());
coords[8..16].copy_from_slice(&node.lon().to_be_bytes());
let _ = node_cache.insert(id_bytes, &coords);
let _ = node_store.insert(node.id(), node.lat(), node.lon());
if node.tags().count() > 0 {
let id = node.id();
@@ -202,6 +269,16 @@ async fn main() -> Result<()> {
}
}
Element::Way(way) => {
if !ways_pending {
// First way encountered. Prepare store for reading.
println!("Switching to Way processing. Flushing node cache...");
if let Err(e) = node_store.prepare_for_reading() {
eprintln!("Failed to prepare node store: {}", e);
return;
}
ways_pending = true;
}
way_count += 1;
let tags: HashMap<String, String> = way.tags().map(|(k, v)| (k.to_string(), v.to_string())).collect();
@@ -219,12 +296,9 @@ async fn main() -> Result<()> {
if is_highway || is_building || is_water || is_landuse || is_railway {
let mut points = Vec::new();
// Resolve nodes from sled
// Resolve nodes from store
for node_id in way.refs() {
let id_bytes = node_id.to_be_bytes();
if let Ok(Some(coords_bytes)) = node_cache.get(id_bytes) {
let lat = f64::from_be_bytes(coords_bytes[0..8].try_into().unwrap());
let lon = f64::from_be_bytes(coords_bytes[8..16].try_into().unwrap());
if let Some((lat, lon)) = node_store.get(node_id) {
points.push((lat, lon));
}
}
@@ -298,7 +372,7 @@ async fn main() -> Result<()> {
consumer_handle.await?;
// Clean up cache
let _ = std::fs::remove_dir_all("node_cache");
let _ = std::fs::remove_file(cache_path);
println!("Done!");
Ok(())