mirror of
https://github.com/netbymatt/ws4kp.git
synced 2026-04-14 15:49:31 -07:00
move station post processor inline with api gets
This commit is contained in:
12
.vscode/launch.json
vendored
12
.vscode/launch.json
vendored
@@ -26,6 +26,18 @@
|
|||||||
"skipFiles": [
|
"skipFiles": [
|
||||||
"<node_internals>/**"
|
"<node_internals>/**"
|
||||||
],
|
],
|
||||||
|
"args": [
|
||||||
|
"--use-cache"
|
||||||
|
],
|
||||||
|
"type": "node"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Data:stations-api",
|
||||||
|
"program": "${workspaceFolder}/datagenerators/stations.mjs",
|
||||||
|
"request": "launch",
|
||||||
|
"skipFiles": [
|
||||||
|
"<node_internals>/**"
|
||||||
|
],
|
||||||
"type": "node"
|
"type": "node"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
import { readFileSync, writeFileSync } from 'fs';
|
import { readFileSync, writeFileSync } from 'fs';
|
||||||
|
|
||||||
|
import * as url from 'node:url';
|
||||||
|
|
||||||
// Load station data
|
// Load station data
|
||||||
const stationInfo = JSON.parse(readFileSync('./datagenerators/output/stations-raw.json', 'utf8'));
|
const stationInfo = JSON.parse(readFileSync('./datagenerators/output/stations-raw.json', 'utf8'));
|
||||||
// const regionalCities = JSON.parse(readFileSync('./datagenerators/output/regionalcities.json', 'utf8'));
|
// const regionalCities = JSON.parse(readFileSync('./datagenerators/output/regionalcities.json', 'utf8'));
|
||||||
@@ -1109,139 +1111,184 @@ or where the fallback to the ICAO airport code occurred:
|
|||||||
jq -c '.[] | select(.name | test("^[A-Z]{3}$")) | {state, city, simple, name}'
|
jq -c '.[] | select(.name | test("^[A-Z]{3}$")) | {state, city, simple, name}'
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const diffMode = process.argv.includes('--diff');
|
const readArguments = () => ({
|
||||||
const onlyProblems = process.argv.includes('--only-problems');
|
diffMode: process.argv.includes('--diff'),
|
||||||
const noProblems = process.argv.includes('--no-problems');
|
onlyProblems: process.argv.includes('--only-problems'),
|
||||||
const onlyDuplicates = process.argv.includes('--only-dupes');
|
noProblems: process.argv.includes('--no-problems'),
|
||||||
const noPriority = process.argv.includes('--no-priority');
|
onlyDuplicates: process.argv.includes('--only-dupes'),
|
||||||
const noSimple = process.argv.includes('--no-simple');
|
noPriority: process.argv.includes('--no-priority'),
|
||||||
const noCoordinates = process.argv.includes('--no-coords');
|
noSimple: process.argv.includes('--no-simple'),
|
||||||
const writeFile = process.argv.includes('--write');
|
noCoordinates: process.argv.includes('--no-coords'),
|
||||||
|
writeFile: process.argv.includes('--write'),
|
||||||
|
});
|
||||||
|
|
||||||
// Process ALL stations at once to get the display name map
|
const DEFAULT_OPTIONS = {
|
||||||
let displayNameMap = processAllStations(stationInfo);
|
diffMode: false,
|
||||||
|
onlyProblems: false,
|
||||||
|
noProblems: false,
|
||||||
|
onlyDuplicates: false,
|
||||||
|
noPriority: false,
|
||||||
|
noSimple: false,
|
||||||
|
noCoordinates: false,
|
||||||
|
writeFile: false,
|
||||||
|
};
|
||||||
|
|
||||||
// Apply priority-based deduplication
|
const postProcessor = (_options) => {
|
||||||
displayNameMap = resolveDuplicatesByPriority(displayNameMap, stationInfo);
|
// combine default and provided options
|
||||||
|
const options = { ...DEFAULT_OPTIONS, ..._options };
|
||||||
|
|
||||||
const results = [];
|
// Process ALL stations at once to get the display name map
|
||||||
|
let displayNameMap = processAllStations(stationInfo);
|
||||||
|
|
||||||
// Now iterate through stations and use the pre-computed display names
|
// Apply priority-based deduplication
|
||||||
const stations = Object.values(stationInfo);
|
displayNameMap = resolveDuplicatesByPriority(displayNameMap, stationInfo);
|
||||||
stations.forEach((station) => {
|
|
||||||
const originalName = station.city;
|
|
||||||
const processedName = processingUtils.finalCleanup(displayNameMap[station.id]); // Look up by station ID
|
|
||||||
|
|
||||||
// Get airport type and priority for this station
|
const results = [];
|
||||||
const airportType = getAirportType(originalName, station.id); // Pass station ID for enhanced detection
|
|
||||||
const priority = getAirportPriority(airportType);
|
|
||||||
|
|
||||||
const potentialIssues = [];
|
// Now iterate through stations and use the pre-computed display names
|
||||||
// Check if the processed name contains punctuation (a period at the end is OK)
|
const stations = Object.values(stationInfo);
|
||||||
if (/[,;!?/:.]/.test(processedName) && !processedName.endsWith('.')) {
|
stations.forEach((station) => {
|
||||||
potentialIssues.push('punctuation');
|
const originalName = station.city;
|
||||||
}
|
const processedName = processingUtils.finalCleanup(displayNameMap[station.id]); // Look up by station ID
|
||||||
if (processedName.length > 12) {
|
|
||||||
potentialIssues.push('long');
|
|
||||||
}
|
|
||||||
if (processedName.length > 20) {
|
|
||||||
potentialIssues.push('reallyLong');
|
|
||||||
}
|
|
||||||
// check if it contains any digits
|
|
||||||
if (/\d/.test(processedName)) {
|
|
||||||
potentialIssues.push('digits');
|
|
||||||
}
|
|
||||||
|
|
||||||
results.push({
|
// Get airport type and priority for this station
|
||||||
id: station.id,
|
const airportType = getAirportType(originalName, station.id); // Pass station ID for enhanced detection
|
||||||
lat: station.lat,
|
const priority = getAirportPriority(airportType);
|
||||||
lon: station.lon,
|
|
||||||
state: station.state,
|
const potentialIssues = [];
|
||||||
location: originalName, // original full location name
|
// Check if the processed name contains punctuation (a period at the end is OK)
|
||||||
city: processedName, // processed city name for display
|
if (/[,;!?/:.]/.test(processedName) && !processedName.endsWith('.')) {
|
||||||
simple: originalName.match(/[^,/;\\-]*/)[0].substr(0, 12).trim(),
|
potentialIssues.push('punctuation');
|
||||||
type: airportType,
|
}
|
||||||
priority,
|
if (processedName.length > 12) {
|
||||||
potentialIssues,
|
potentialIssues.push('long');
|
||||||
|
}
|
||||||
|
if (processedName.length > 20) {
|
||||||
|
potentialIssues.push('reallyLong');
|
||||||
|
}
|
||||||
|
// check if it contains any digits
|
||||||
|
if (/\d/.test(processedName)) {
|
||||||
|
potentialIssues.push('digits');
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
id: station.id,
|
||||||
|
lat: station.lat,
|
||||||
|
lon: station.lon,
|
||||||
|
state: station.state,
|
||||||
|
location: originalName, // original full location name
|
||||||
|
city: processedName, // processed city name for display
|
||||||
|
simple: originalName.match(/[^,/;\\-]*/)[0].substr(0, 12).trim(),
|
||||||
|
type: airportType,
|
||||||
|
priority,
|
||||||
|
potentialIssues,
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
|
||||||
|
|
||||||
// Check for duplicates by state
|
// Check for duplicates by state
|
||||||
const cleanedMapByState = new Map();
|
const cleanedMapByState = new Map();
|
||||||
|
|
||||||
results.forEach((result) => {
|
results.forEach((result) => {
|
||||||
const { state } = result;
|
const { state } = result;
|
||||||
if (!cleanedMapByState.has(state)) {
|
if (!cleanedMapByState.has(state)) {
|
||||||
cleanedMapByState.set(state, new Map());
|
cleanedMapByState.set(state, new Map());
|
||||||
}
|
}
|
||||||
const stateMap = cleanedMapByState.get(state);
|
const stateMap = cleanedMapByState.get(state);
|
||||||
if (stateMap.has(result.city)) {
|
if (stateMap.has(result.city)) {
|
||||||
stateMap.get(result.city).push(result);
|
stateMap.get(result.city).push(result);
|
||||||
} else {
|
} else {
|
||||||
stateMap.set(result.city, [result]);
|
stateMap.set(result.city, [result]);
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
cleanedMapByState.forEach((stateMap, _state) => {
|
|
||||||
stateMap.forEach((originals, _cleaned) => {
|
|
||||||
if (originals.length > 1) {
|
|
||||||
originals.forEach((original) => {
|
|
||||||
if (!original.potentialIssues.includes('duplicate')) {
|
|
||||||
original.potentialIssues.push('duplicate');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
|
||||||
|
|
||||||
// Filter results if requested
|
cleanedMapByState.forEach((stateMap, _state) => {
|
||||||
let finalResults = results;
|
stateMap.forEach((originals, _cleaned) => {
|
||||||
if (onlyProblems) {
|
if (originals.length > 1) {
|
||||||
finalResults = results.filter((r) => r.potentialIssues.length > 0);
|
originals.forEach((original) => {
|
||||||
}
|
if (!original.potentialIssues.includes('duplicate')) {
|
||||||
if (onlyDuplicates) {
|
original.potentialIssues.push('duplicate');
|
||||||
finalResults = finalResults.filter((r) => r.potentialIssues.includes('duplicate'));
|
}
|
||||||
}
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
const outputResult = finalResults.map((result) => {
|
// Filter results if requested
|
||||||
let outputItem = result;
|
let finalResults = results;
|
||||||
|
if (options.onlyProblems) {
|
||||||
// Don't include lat or long in diff mode
|
finalResults = results.filter((r) => r.potentialIssues.length > 0);
|
||||||
if (noCoordinates || diffMode) {
|
}
|
||||||
const {
|
if (options.onlyDuplicates) {
|
||||||
lat: _lat, lon: _lon, ...resultWithoutLocation
|
finalResults = finalResults.filter((r) => r.potentialIssues.includes('duplicate'));
|
||||||
} = result;
|
|
||||||
outputItem = resultWithoutLocation;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't include potentialIssues when --no-problems is specified
|
const outputResult = finalResults.map((result) => {
|
||||||
if (noProblems || diffMode) {
|
let outputItem = result;
|
||||||
const { potentialIssues: _potentialIssues, ...resultWithoutIssues } = outputItem;
|
|
||||||
outputItem = resultWithoutIssues;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove type and priority if --no-priority is specified
|
// Don't include lat or long in diff mode
|
||||||
if (noPriority || diffMode) {
|
if (options.noCoordinates || options.diffMode) {
|
||||||
const { type: _type, priority: _priority, ...resultWithoutPriority } = outputItem;
|
const {
|
||||||
outputItem = resultWithoutPriority;
|
lat: _lat, lon: _lon, ...resultWithoutLocation
|
||||||
}
|
} = result;
|
||||||
|
outputItem = resultWithoutLocation;
|
||||||
|
}
|
||||||
|
|
||||||
// remove simple field if --no-simple is specified
|
// Don't include potentialIssues when --no-problems is specified
|
||||||
if (noSimple || diffMode) {
|
if (options.noProblems || options.diffMode) {
|
||||||
const { simple: _simple, ...resultWithoutSimple } = outputItem;
|
const { potentialIssues: _potentialIssues, ...resultWithoutIssues } = outputItem;
|
||||||
outputItem = resultWithoutSimple;
|
outputItem = resultWithoutIssues;
|
||||||
}
|
}
|
||||||
|
|
||||||
return outputItem;
|
// Remove type and priority if --no-priority is specified
|
||||||
});
|
if (options.noPriority || options.diffMode) {
|
||||||
|
const { type: _type, priority: _priority, ...resultWithoutPriority } = outputItem;
|
||||||
|
outputItem = resultWithoutPriority;
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove simple field if --no-simple is specified
|
||||||
|
if (options.noSimple || options.diffMode) {
|
||||||
|
const { simple: _simple, ...resultWithoutSimple } = outputItem;
|
||||||
|
outputItem = resultWithoutSimple;
|
||||||
|
}
|
||||||
|
|
||||||
|
return outputItem;
|
||||||
|
});
|
||||||
|
|
||||||
if (writeFile) {
|
|
||||||
const fileResults = results.map(({
|
const fileResults = results.map(({
|
||||||
simple: _simple, type: _type, potentialIssues: _potentialIssues, ...rest
|
simple: _simple, type: _type, potentialIssues: _potentialIssues, location: _location, ...rest
|
||||||
}) => rest);
|
}) => rest);
|
||||||
|
|
||||||
writeFileSync('./datagenerators/output/stations.json', compactStringifyToObject(fileResults));
|
if (options.writeFile) {
|
||||||
console.log(`Wrote ${fileResults.length} processed stations to datagenerators/output/stations.json`);
|
writeFileSync('./datagenerators/output/stations.json', compactStringifyToObject(fileResults));
|
||||||
} else {
|
console.log(`Wrote ${fileResults.length} processed stations to datagenerators/output/stations.json`);
|
||||||
console.log(compactStringifyToArray(outputResult));
|
} else {
|
||||||
|
console.log(compactStringifyToArray(outputResult));
|
||||||
|
}
|
||||||
|
|
||||||
|
// array to output object
|
||||||
|
const returnObject = {};
|
||||||
|
fileResults.forEach((item) => {
|
||||||
|
returnObject[item.id] = item;
|
||||||
|
});
|
||||||
|
|
||||||
|
return returnObject;
|
||||||
|
};
|
||||||
|
|
||||||
|
// determine if running from command line or module
|
||||||
|
const commandLine = (() => {
|
||||||
|
if (import.meta.url.startsWith('file:')) { // (A)
|
||||||
|
const modulePath = url.fileURLToPath(import.meta.url);
|
||||||
|
if (process.argv[1] === modulePath) { // (B)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
)();
|
||||||
|
|
||||||
|
// run post processor if called from command line
|
||||||
|
if (commandLine) {
|
||||||
|
postProcessor(readArguments());
|
||||||
|
}
|
||||||
|
|
||||||
|
export default postProcessor;
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
/* eslint-disable no-loop-func */
|
||||||
// list all stations in a single file
|
// list all stations in a single file
|
||||||
// only find stations with 4 letter codes
|
// only find stations with 4 letter codes
|
||||||
|
|
||||||
@@ -6,67 +7,91 @@ import https from './https.mjs';
|
|||||||
import states from './stations-states.mjs';
|
import states from './stations-states.mjs';
|
||||||
import chunk from './chunk.mjs';
|
import chunk from './chunk.mjs';
|
||||||
import overrides from './stations-overrides.mjs';
|
import overrides from './stations-overrides.mjs';
|
||||||
|
import postProcessor from './stations-postprocessor.mjs';
|
||||||
|
|
||||||
|
// check for cached flag
|
||||||
|
const USE_CACHE = process.argv.includes('--use-cache');
|
||||||
|
|
||||||
// skip stations starting with these letters
|
// skip stations starting with these letters
|
||||||
const skipStations = ['U', 'C', 'H', 'W', 'Y', 'T', 'S', 'M', 'O', 'L', 'A', 'F', 'B', 'N', 'V', 'R', 'D', 'E', 'I', 'G', 'J'];
|
const skipStations = ['U', 'C', 'H', 'W', 'Y', 'T', 'S', 'M', 'O', 'L', 'A', 'F', 'B', 'N', 'V', 'R', 'D', 'E', 'I', 'G', 'J'];
|
||||||
|
|
||||||
// chunk the list of states
|
// chunk the list of states
|
||||||
const chunkStates = chunk(states, 1);
|
const chunkStates = chunk(states, 3);
|
||||||
|
|
||||||
// store output
|
// store output
|
||||||
const output = {};
|
const output = {};
|
||||||
|
let completed = 0;
|
||||||
|
|
||||||
// process all chunks
|
// get data from api if desired
|
||||||
for (let i = 0; i < chunkStates.length; i += 1) {
|
if (!USE_CACHE) {
|
||||||
const stateChunk = chunkStates[i];
|
// process all chunks
|
||||||
// loop through states
|
for (let i = 0; i < chunkStates.length; i += 1) {
|
||||||
|
const stateChunk = chunkStates[i];
|
||||||
|
// loop through states
|
||||||
|
|
||||||
// eslint-disable-next-line no-await-in-loop
|
// eslint-disable-next-line no-await-in-loop
|
||||||
await Promise.allSettled(stateChunk.map(async (state) => {
|
await Promise.allSettled(stateChunk.map(async (state) => {
|
||||||
try {
|
try {
|
||||||
let stations;
|
let stations;
|
||||||
let next = `https://api.weather.gov/stations?state=${state}`;
|
let next = `https://api.weather.gov/stations?state=${state}`;
|
||||||
let round = 0;
|
let round = 0;
|
||||||
do {
|
do {
|
||||||
console.log(`Getting: ${state}-${round}`);
|
console.log(`Getting: ${state}-${round}`);
|
||||||
// get list and parse the JSON
|
// get list and parse the JSON
|
||||||
// eslint-disable-next-line no-await-in-loop
|
// eslint-disable-next-line no-await-in-loop
|
||||||
const stationsRaw = await https(next);
|
const stationsRaw = await https(next);
|
||||||
stations = JSON.parse(stationsRaw);
|
stations = JSON.parse(stationsRaw);
|
||||||
// filter stations for 4 letter identifiers
|
// filter stations for 4 letter identifiers
|
||||||
const stationsFiltered4 = stations.features.filter((station) => station.properties.stationIdentifier.match(/^[A-Z]{4}$/));
|
const stationsFiltered4 = stations.features.filter((station) => station.properties.stationIdentifier.match(/^[A-Z]{4}$/));
|
||||||
// filter against starting letter
|
// filter against starting letter
|
||||||
const stationsFiltered = stationsFiltered4.filter((station) => !skipStations.includes(station.properties.stationIdentifier.slice(0, 1)));
|
const stationsFiltered = stationsFiltered4.filter((station) => !skipStations.includes(station.properties.stationIdentifier.slice(0, 1)));
|
||||||
// add each resulting station to the output
|
// add each resulting station to the output
|
||||||
stationsFiltered.forEach((station) => {
|
stationsFiltered.forEach((station) => {
|
||||||
const id = station.properties.stationIdentifier;
|
const id = station.properties.stationIdentifier;
|
||||||
if (output[id]) {
|
if (output[id]) {
|
||||||
console.log(`Duplicate station: ${state}-${id}`);
|
console.log(`Duplicate station: ${state}-${id}`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// get any overrides if available
|
output[id] = {
|
||||||
const override = overrides[id] ?? {};
|
id,
|
||||||
output[id] = {
|
city: station.properties.name,
|
||||||
id,
|
state,
|
||||||
city: station.properties.name,
|
lat: station.geometry.coordinates[1],
|
||||||
state,
|
lon: station.geometry.coordinates[0],
|
||||||
lat: station.geometry.coordinates[1],
|
};
|
||||||
lon: station.geometry.coordinates[0],
|
});
|
||||||
// finally add the overrides
|
next = stations?.pagination?.next;
|
||||||
...override,
|
round += 1;
|
||||||
};
|
// write the output
|
||||||
});
|
writeFileSync('./datagenerators/output/stations-raw.json', JSON.stringify(output, null, 2));
|
||||||
next = stations?.pagination?.next;
|
}
|
||||||
round += 1;
|
while (next && stations.features.length > 0);
|
||||||
// write the output
|
completed += 1;
|
||||||
writeFileSync('./datagenerators/output/stations-raw.json', JSON.stringify(output, null, 2));
|
console.log(`Complete: ${state} ${completed}/${states.length}`);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
console.error(`Unable to get state: ${state}`);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
while (next && stations.features.length > 0);
|
}));
|
||||||
console.log(`Complete: ${state}`);
|
}
|
||||||
return true;
|
|
||||||
} catch {
|
|
||||||
console.error(`Unable to get state: ${state}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// run the post processor
|
||||||
|
// data is passed through the file stations-raw.json
|
||||||
|
const postProcessed = postProcessor();
|
||||||
|
|
||||||
|
// apply any overrides
|
||||||
|
Object.entries(overrides).forEach(([id, values]) => {
|
||||||
|
// check for existing value
|
||||||
|
if (postProcessed[id]) {
|
||||||
|
// apply the overrides
|
||||||
|
postProcessed[id] = {
|
||||||
|
...postProcessed[id],
|
||||||
|
...values,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// write final file to disk
|
||||||
|
writeFileSync('./datagenerators/output/stations.json', JSON.stringify(postProcessed, null, 2));
|
||||||
|
|||||||
Reference in New Issue
Block a user