I've got a string with the following format:
City, State ZIP
I'd like to get City and State from this string.
How can I do that with JavaScript? edit: note that he doesn't mention he already has the zip code when he gets here, if that helps you in your solution ~~ drachenstern
var address = "San Francisco, CA 94129";
function parseAddress(address) {
// Make sure the address is a string.
if (typeof address !== "string") throw "Address is not a string.";
// Trim the address.
address = address.trim();
// Make an object to contain the data.
var returned = {};
// Find the comma.
var comma = address.indexOf(',');
// Pull out the city.
returned.city = address.slice(0, comma);
// Get everything after the city.
var after = address.substring(comma + 2); // The string after the comma, +2 so that we skip the comma and the space.
// Find the space.
var space = after.lastIndexOf(' ');
// Pull out the state.
returned.state = after.slice(0, space);
// Pull out the zip code.
returned.zip = after.substring(space + 1);
// Return the data.
return returned;
}
address = parseAddress(address);
This is probably better then using regular expressions and String.split(), as it takes into account that the state and city may have spaces.
EDIT: Bug fix: It only included the first word of multi-word state names.
And here's a minified version. :D
function parseAddress(a) {if(typeof a!=="string") throw "Address is not a string.";a=a.trim();var r={},c=a.indexOf(',');r.city=a.slice(0,c);var f=a.substring(c+2),s=f.lastIndexOf(' ');r.state=f.slice(0,s);r.zip=f.substring(s+1);return r;}
There are many ways to do this. Here's a very naive one:
var parts = "City, State ZIP".split(/\s+/); // split on whitespace
var city = parts[0].slice(0, parts[0].length - 1); // remove trailing comma
var state = parts[1];
var zip = parts[2];
Here's one that accounts for the presence of spaces in either the city or state or both:
var parts = "san fran bay, new mex state 666666".split(/\s+|,/),
partition = parts.indexOf(""),
city = parts.slice(0, partition).join(" "),
state = parts.slice(partition + 1, -1).join(" "),
zip = parts.pop();
This last one only works if you're lucky enough to be in an environment that supports destructuring assignment:
var city, statezip, state, zip, parts;
[city, statezip] = "Spaced City, New Mexico ZIP".split(/,\s*/);
parts = statezip.split(/\s+/);
zip = parts.pop();
state = parts.join(" ");
None of these perform any validation, of course.
Ok, since advising regex isn't good, here's my solution. It takes into account cities that have spaces in them, which the other responses here don't seem to do:
var str = "New York, NY 20101";
var cityAndRest = str.split(',');
var city = cityAndRest[0];
var stateAndZip = cityAndRest[1].trim().split(' ');
var state = stateAndZip[0];
var zip = stateAndZip[1];
First assumption: American addresses only.
First find out if the last 5 or the last 10 characters are numeric. A simpler test is to see if the last character is numeric. If so, it's probably got the zip code included. Then a simple test to see if the last 10 contains a space (city #####
) or if the last ten include a dash (12345-6789
) to figure out if it's a 5 or 5+4 zip. We'll test for a hyphen and no space. (city-du-lac 12345
captures -lac 12345
)
Next, all addresses split the city and state by a comma, so we want the last comma. Find the index of the last comma, and split there. I don't know of a city that uses commas in it's name, and I'm sure not gonna let my parser burst on an unknown if I can help it. I do ignore the fact that Washington DC
could also be Washington, DC
. I figure edge cases are for libraries, not one off scripts.
Lastly, trim everything that remains to remove trailing or leading spaces.
function IsNumeric(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
var addr = 'New York City, New York 10101';
//var addr = 'San Bernadino, CA 11111';
function getCityStateZip(addr){
var city; var state;var zip;
city = ''; state = ''; zip = '';
var addrLen = addr.length;
if ( IsNumeric( addr.substring(addrLen - 1) ) ) {
//contains a zipcode - just a sanity check
//get last 10 characters for testing easily
var lastTen = addr.substring( addrLen - 10 );
if ( lastTen.indexOf('-') > 0 && ( lastTen.indexOf(' ') == -1 ) ) {
//found a hyphen and no space (matches our complex rule for zipcodes)
zip = lastTen;
} else {
zip = addr.substring( addrLen - 5 ); //assume a basic 5 zip code
}
}
var zipLen = zip.length;
addrLen = addrLen - zipLen - 1;
addr = addr.substring(0, addrLen ); //remove the chars we just moved into zip
var lastComma = addr.lastIndexOf(',');
if ( lastComma == -1 ) {
//you have a problem, how do you want to handle it?
}
city = addr.substring(0,lastComma); //skip the comma itself, yes?
state = addr.substring(lastComma + 2);
return { 'city':city,'state': state,'zip': zip};
}
getCityStateZip(addr)
IsNumeric
js function can be found here Validate decimal numbers in JavaScript - IsNumeric()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With