This commit is contained in:
2025-11-12 22:33:56 +01:00
parent 95669fd211
commit 45df834d5b
12 changed files with 1172 additions and 240 deletions

56
fix-categories.js Normal file
View File

@@ -0,0 +1,56 @@
// MongoDB script to fix article categories
// Run with: docker exec -i munich-news-mongodb mongosh -u admin -p changeme --authenticationDatabase admin < fix-categories.js
use munich_news
print("=== RSS Feeds and their categories ===");
db.rss_feeds.find({}, {name: 1, category: 1, _id: 0}).forEach(feed => {
print(`${feed.name}: ${feed.category || 'NO CATEGORY'}`);
});
print("\n=== Current article category distribution ===");
db.articles.aggregate([
{$group: {_id: "$category", count: {$sum: 1}}},
{$sort: {count: -1}}
]).forEach(result => {
print(`${result._id || 'null'}: ${result.count} articles`);
});
print("\n=== Fixing null categories ===");
// Update articles based on their RSS feed source
var feedsUpdated = 0;
db.rss_feeds.find().forEach(function(feed) {
if (feed.category) {
var result = db.articles.updateMany(
{source: feed.name, category: null},
{$set: {category: feed.category}}
);
if (result.modifiedCount > 0) {
print(`Updated ${result.modifiedCount} articles from ${feed.name} to category: ${feed.category}`);
feedsUpdated += result.modifiedCount;
}
}
});
// Set remaining null categories to 'general'
var remainingNull = db.articles.updateMany(
{category: null},
{$set: {category: "general"}}
);
if (remainingNull.modifiedCount > 0) {
print(`Set ${remainingNull.modifiedCount} remaining null articles to 'general'`);
}
print(`\nTotal articles updated: ${feedsUpdated + remainingNull.modifiedCount}`);
print("\n=== Updated article category distribution ===");
db.articles.aggregate([
{$group: {_id: "$category", count: {$sum: 1}}},
{$sort: {count: -1}}
]).forEach(result => {
print(`${result._id || 'null'}: ${result.count} articles`);
});
print("\n✓ Done!");