move station post processor inline with api gets

This commit is contained in:
Matt Walsh
2025-08-15 14:59:16 -05:00
parent ea58b5a9c8
commit c05b827593
5 changed files with 34748 additions and 18596 deletions

12
.vscode/launch.json vendored
View File

@@ -26,6 +26,18 @@
"skipFiles": [ "skipFiles": [
"<node_internals>/**" "<node_internals>/**"
], ],
"args": [
"--use-cache"
],
"type": "node"
},
{
"name": "Data:stations-api",
"program": "${workspaceFolder}/datagenerators/stations.mjs",
"request": "launch",
"skipFiles": [
"<node_internals>/**"
],
"type": "node" "type": "node"
}, },
{ {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -2,6 +2,8 @@
import { readFileSync, writeFileSync } from 'fs'; import { readFileSync, writeFileSync } from 'fs';
import * as url from 'node:url';
// Load station data // Load station data
const stationInfo = JSON.parse(readFileSync('./datagenerators/output/stations-raw.json', 'utf8')); const stationInfo = JSON.parse(readFileSync('./datagenerators/output/stations-raw.json', 'utf8'));
// const regionalCities = JSON.parse(readFileSync('./datagenerators/output/regionalcities.json', 'utf8')); // const regionalCities = JSON.parse(readFileSync('./datagenerators/output/regionalcities.json', 'utf8'));
@@ -1109,139 +1111,184 @@ or where the fallback to the ICAO airport code occurred:
jq -c '.[] | select(.name | test("^[A-Z]{3}$")) | {state, city, simple, name}' jq -c '.[] | select(.name | test("^[A-Z]{3}$")) | {state, city, simple, name}'
*/ */
const diffMode = process.argv.includes('--diff'); const readArguments = () => ({
const onlyProblems = process.argv.includes('--only-problems'); diffMode: process.argv.includes('--diff'),
const noProblems = process.argv.includes('--no-problems'); onlyProblems: process.argv.includes('--only-problems'),
const onlyDuplicates = process.argv.includes('--only-dupes'); noProblems: process.argv.includes('--no-problems'),
const noPriority = process.argv.includes('--no-priority'); onlyDuplicates: process.argv.includes('--only-dupes'),
const noSimple = process.argv.includes('--no-simple'); noPriority: process.argv.includes('--no-priority'),
const noCoordinates = process.argv.includes('--no-coords'); noSimple: process.argv.includes('--no-simple'),
const writeFile = process.argv.includes('--write'); noCoordinates: process.argv.includes('--no-coords'),
writeFile: process.argv.includes('--write'),
});
// Process ALL stations at once to get the display name map const DEFAULT_OPTIONS = {
let displayNameMap = processAllStations(stationInfo); diffMode: false,
onlyProblems: false,
noProblems: false,
onlyDuplicates: false,
noPriority: false,
noSimple: false,
noCoordinates: false,
writeFile: false,
};
// Apply priority-based deduplication const postProcessor = (_options) => {
displayNameMap = resolveDuplicatesByPriority(displayNameMap, stationInfo); // combine default and provided options
const options = { ...DEFAULT_OPTIONS, ..._options };
const results = []; // Process ALL stations at once to get the display name map
let displayNameMap = processAllStations(stationInfo);
// Now iterate through stations and use the pre-computed display names // Apply priority-based deduplication
const stations = Object.values(stationInfo); displayNameMap = resolveDuplicatesByPriority(displayNameMap, stationInfo);
stations.forEach((station) => {
const originalName = station.city;
const processedName = processingUtils.finalCleanup(displayNameMap[station.id]); // Look up by station ID
// Get airport type and priority for this station const results = [];
const airportType = getAirportType(originalName, station.id); // Pass station ID for enhanced detection
const priority = getAirportPriority(airportType);
const potentialIssues = []; // Now iterate through stations and use the pre-computed display names
// Check if the processed name contains punctuation (a period at the end is OK) const stations = Object.values(stationInfo);
if (/[,;!?/:.]/.test(processedName) && !processedName.endsWith('.')) { stations.forEach((station) => {
potentialIssues.push('punctuation'); const originalName = station.city;
} const processedName = processingUtils.finalCleanup(displayNameMap[station.id]); // Look up by station ID
if (processedName.length > 12) {
potentialIssues.push('long');
}
if (processedName.length > 20) {
potentialIssues.push('reallyLong');
}
// check if it contains any digits
if (/\d/.test(processedName)) {
potentialIssues.push('digits');
}
results.push({ // Get airport type and priority for this station
id: station.id, const airportType = getAirportType(originalName, station.id); // Pass station ID for enhanced detection
lat: station.lat, const priority = getAirportPriority(airportType);
lon: station.lon,
state: station.state, const potentialIssues = [];
location: originalName, // original full location name // Check if the processed name contains punctuation (a period at the end is OK)
city: processedName, // processed city name for display if (/[,;!?/:.]/.test(processedName) && !processedName.endsWith('.')) {
simple: originalName.match(/[^,/;\\-]*/)[0].substr(0, 12).trim(), potentialIssues.push('punctuation');
type: airportType, }
priority, if (processedName.length > 12) {
potentialIssues, potentialIssues.push('long');
}
if (processedName.length > 20) {
potentialIssues.push('reallyLong');
}
// check if it contains any digits
if (/\d/.test(processedName)) {
potentialIssues.push('digits');
}
results.push({
id: station.id,
lat: station.lat,
lon: station.lon,
state: station.state,
location: originalName, // original full location name
city: processedName, // processed city name for display
simple: originalName.match(/[^,/;\\-]*/)[0].substr(0, 12).trim(),
type: airportType,
priority,
potentialIssues,
});
}); });
});
// Check for duplicates by state // Check for duplicates by state
const cleanedMapByState = new Map(); const cleanedMapByState = new Map();
results.forEach((result) => { results.forEach((result) => {
const { state } = result; const { state } = result;
if (!cleanedMapByState.has(state)) { if (!cleanedMapByState.has(state)) {
cleanedMapByState.set(state, new Map()); cleanedMapByState.set(state, new Map());
} }
const stateMap = cleanedMapByState.get(state); const stateMap = cleanedMapByState.get(state);
if (stateMap.has(result.city)) { if (stateMap.has(result.city)) {
stateMap.get(result.city).push(result); stateMap.get(result.city).push(result);
} else { } else {
stateMap.set(result.city, [result]); stateMap.set(result.city, [result]);
}
});
cleanedMapByState.forEach((stateMap, _state) => {
stateMap.forEach((originals, _cleaned) => {
if (originals.length > 1) {
originals.forEach((original) => {
if (!original.potentialIssues.includes('duplicate')) {
original.potentialIssues.push('duplicate');
}
});
} }
}); });
});
// Filter results if requested cleanedMapByState.forEach((stateMap, _state) => {
let finalResults = results; stateMap.forEach((originals, _cleaned) => {
if (onlyProblems) { if (originals.length > 1) {
finalResults = results.filter((r) => r.potentialIssues.length > 0); originals.forEach((original) => {
} if (!original.potentialIssues.includes('duplicate')) {
if (onlyDuplicates) { original.potentialIssues.push('duplicate');
finalResults = finalResults.filter((r) => r.potentialIssues.includes('duplicate')); }
} });
}
});
});
const outputResult = finalResults.map((result) => { // Filter results if requested
let outputItem = result; let finalResults = results;
if (options.onlyProblems) {
// Don't include lat or long in diff mode finalResults = results.filter((r) => r.potentialIssues.length > 0);
if (noCoordinates || diffMode) { }
const { if (options.onlyDuplicates) {
lat: _lat, lon: _lon, ...resultWithoutLocation finalResults = finalResults.filter((r) => r.potentialIssues.includes('duplicate'));
} = result;
outputItem = resultWithoutLocation;
} }
// Don't include potentialIssues when --no-problems is specified const outputResult = finalResults.map((result) => {
if (noProblems || diffMode) { let outputItem = result;
const { potentialIssues: _potentialIssues, ...resultWithoutIssues } = outputItem;
outputItem = resultWithoutIssues;
}
// Remove type and priority if --no-priority is specified // Don't include lat or long in diff mode
if (noPriority || diffMode) { if (options.noCoordinates || options.diffMode) {
const { type: _type, priority: _priority, ...resultWithoutPriority } = outputItem; const {
outputItem = resultWithoutPriority; lat: _lat, lon: _lon, ...resultWithoutLocation
} } = result;
outputItem = resultWithoutLocation;
}
// remove simple field if --no-simple is specified // Don't include potentialIssues when --no-problems is specified
if (noSimple || diffMode) { if (options.noProblems || options.diffMode) {
const { simple: _simple, ...resultWithoutSimple } = outputItem; const { potentialIssues: _potentialIssues, ...resultWithoutIssues } = outputItem;
outputItem = resultWithoutSimple; outputItem = resultWithoutIssues;
} }
return outputItem; // Remove type and priority if --no-priority is specified
}); if (options.noPriority || options.diffMode) {
const { type: _type, priority: _priority, ...resultWithoutPriority } = outputItem;
outputItem = resultWithoutPriority;
}
// remove simple field if --no-simple is specified
if (options.noSimple || options.diffMode) {
const { simple: _simple, ...resultWithoutSimple } = outputItem;
outputItem = resultWithoutSimple;
}
return outputItem;
});
if (writeFile) {
const fileResults = results.map(({ const fileResults = results.map(({
simple: _simple, type: _type, potentialIssues: _potentialIssues, ...rest simple: _simple, type: _type, potentialIssues: _potentialIssues, location: _location, ...rest
}) => rest); }) => rest);
writeFileSync('./datagenerators/output/stations.json', compactStringifyToObject(fileResults)); if (options.writeFile) {
console.log(`Wrote ${fileResults.length} processed stations to datagenerators/output/stations.json`); writeFileSync('./datagenerators/output/stations.json', compactStringifyToObject(fileResults));
} else { console.log(`Wrote ${fileResults.length} processed stations to datagenerators/output/stations.json`);
console.log(compactStringifyToArray(outputResult)); } else {
console.log(compactStringifyToArray(outputResult));
}
// array to output object
const returnObject = {};
fileResults.forEach((item) => {
returnObject[item.id] = item;
});
return returnObject;
};
// determine if running from command line or module
const commandLine = (() => {
if (import.meta.url.startsWith('file:')) { // (A)
const modulePath = url.fileURLToPath(import.meta.url);
if (process.argv[1] === modulePath) { // (B)
return true;
}
}
return false;
} }
)();
// run post processor if called from command line
if (commandLine) {
postProcessor(readArguments());
}
export default postProcessor;

View File

@@ -1,3 +1,4 @@
/* eslint-disable no-loop-func */
// list all stations in a single file // list all stations in a single file
// only find stations with 4 letter codes // only find stations with 4 letter codes
@@ -6,67 +7,91 @@ import https from './https.mjs';
import states from './stations-states.mjs'; import states from './stations-states.mjs';
import chunk from './chunk.mjs'; import chunk from './chunk.mjs';
import overrides from './stations-overrides.mjs'; import overrides from './stations-overrides.mjs';
import postProcessor from './stations-postprocessor.mjs';
// check for cached flag
const USE_CACHE = process.argv.includes('--use-cache');
// skip stations starting with these letters // skip stations starting with these letters
const skipStations = ['U', 'C', 'H', 'W', 'Y', 'T', 'S', 'M', 'O', 'L', 'A', 'F', 'B', 'N', 'V', 'R', 'D', 'E', 'I', 'G', 'J']; const skipStations = ['U', 'C', 'H', 'W', 'Y', 'T', 'S', 'M', 'O', 'L', 'A', 'F', 'B', 'N', 'V', 'R', 'D', 'E', 'I', 'G', 'J'];
// chunk the list of states // chunk the list of states
const chunkStates = chunk(states, 1); const chunkStates = chunk(states, 3);
// store output // store output
const output = {}; const output = {};
let completed = 0;
// process all chunks // get data from api if desired
for (let i = 0; i < chunkStates.length; i += 1) { if (!USE_CACHE) {
const stateChunk = chunkStates[i]; // process all chunks
// loop through states for (let i = 0; i < chunkStates.length; i += 1) {
const stateChunk = chunkStates[i];
// loop through states
// eslint-disable-next-line no-await-in-loop // eslint-disable-next-line no-await-in-loop
await Promise.allSettled(stateChunk.map(async (state) => { await Promise.allSettled(stateChunk.map(async (state) => {
try { try {
let stations; let stations;
let next = `https://api.weather.gov/stations?state=${state}`; let next = `https://api.weather.gov/stations?state=${state}`;
let round = 0; let round = 0;
do { do {
console.log(`Getting: ${state}-${round}`); console.log(`Getting: ${state}-${round}`);
// get list and parse the JSON // get list and parse the JSON
// eslint-disable-next-line no-await-in-loop // eslint-disable-next-line no-await-in-loop
const stationsRaw = await https(next); const stationsRaw = await https(next);
stations = JSON.parse(stationsRaw); stations = JSON.parse(stationsRaw);
// filter stations for 4 letter identifiers // filter stations for 4 letter identifiers
const stationsFiltered4 = stations.features.filter((station) => station.properties.stationIdentifier.match(/^[A-Z]{4}$/)); const stationsFiltered4 = stations.features.filter((station) => station.properties.stationIdentifier.match(/^[A-Z]{4}$/));
// filter against starting letter // filter against starting letter
const stationsFiltered = stationsFiltered4.filter((station) => !skipStations.includes(station.properties.stationIdentifier.slice(0, 1))); const stationsFiltered = stationsFiltered4.filter((station) => !skipStations.includes(station.properties.stationIdentifier.slice(0, 1)));
// add each resulting station to the output // add each resulting station to the output
stationsFiltered.forEach((station) => { stationsFiltered.forEach((station) => {
const id = station.properties.stationIdentifier; const id = station.properties.stationIdentifier;
if (output[id]) { if (output[id]) {
console.log(`Duplicate station: ${state}-${id}`); console.log(`Duplicate station: ${state}-${id}`);
return; return;
} }
// get any overrides if available output[id] = {
const override = overrides[id] ?? {}; id,
output[id] = { city: station.properties.name,
id, state,
city: station.properties.name, lat: station.geometry.coordinates[1],
state, lon: station.geometry.coordinates[0],
lat: station.geometry.coordinates[1], };
lon: station.geometry.coordinates[0], });
// finally add the overrides next = stations?.pagination?.next;
...override, round += 1;
}; // write the output
}); writeFileSync('./datagenerators/output/stations-raw.json', JSON.stringify(output, null, 2));
next = stations?.pagination?.next; }
round += 1; while (next && stations.features.length > 0);
// write the output completed += 1;
writeFileSync('./datagenerators/output/stations-raw.json', JSON.stringify(output, null, 2)); console.log(`Complete: ${state} ${completed}/${states.length}`);
return true;
} catch {
console.error(`Unable to get state: ${state}`);
return false;
} }
while (next && stations.features.length > 0); }));
console.log(`Complete: ${state}`); }
return true;
} catch {
console.error(`Unable to get state: ${state}`);
return false;
}
}));
} }
// run the post processor
// data is passed through the file stations-raw.json
const postProcessed = postProcessor();
// apply any overrides
Object.entries(overrides).forEach(([id, values]) => {
// check for existing value
if (postProcessed[id]) {
// apply the overrides
postProcessed[id] = {
...postProcessed[id],
...values,
};
}
});
// write final file to disk
writeFileSync('./datagenerators/output/stations.json', JSON.stringify(postProcessed, null, 2));