|
| 1 | +// Package postcode provides functionality specifically for downloading and parsing |
| 2 | +// postal code data from the GeoNames geographical database (https://www.geonames.org/). |
| 3 | +package postcode |
| 4 | + |
| 5 | +import ( |
| 6 | + "archive/zip" |
| 7 | + "bytes" |
| 8 | + "encoding/csv" |
| 9 | + "errors" |
| 10 | + "fmt" |
| 11 | + "io" |
| 12 | + "net/http" |
| 13 | + "strings" |
| 14 | +) |
| 15 | + |
| 16 | +// HTTPClient is a global http.Client instance used for making HTTP requests. |
| 17 | +// This can be replaced or configured as needed to change the default HTTP behavior. |
| 18 | +var HTTPClient http.Client |
| 19 | + |
| 20 | +// Entry represents a single postal code entry. It is an array of 12 strings, each representing a specific field of data. |
| 21 | +type Entry [12]string |
| 22 | + |
| 23 | +// Field represents a specific field in a postal code entry. |
| 24 | +type Field int |
| 25 | + |
| 26 | +const ( |
| 27 | + // CountryCode is the index for the country code in a postal code entry. |
| 28 | + CountryCode Field = iota |
| 29 | + // PostalCode is the index for the postal code in a postal code entry. |
| 30 | + PostalCode |
| 31 | + // PlaceName is the index for the place name in a postal code entry. |
| 32 | + PlaceName |
| 33 | + // AdminName1 is the index for the first level of administrative division name in a postal code entry. |
| 34 | + AdminName1 |
| 35 | + // AdminCode1 is the index for the first level of administrative division code in a postal code entry. |
| 36 | + AdminCode1 |
| 37 | + // AdminName2 is the index for the second level of administrative division name in a postal code entry. |
| 38 | + AdminName2 |
| 39 | + // AdminCode2 is the index for the second level of administrative division code in a postal code entry. |
| 40 | + AdminCode2 |
| 41 | + // AdminName3 is the index for the third level of administrative division name in a postal code entry. |
| 42 | + AdminName3 |
| 43 | + // AdminCode3 is the index for the third level of administrative division code in a postal code entry. |
| 44 | + AdminCode3 |
| 45 | + // Latitude is the index for the latitude in a postal code entry. |
| 46 | + Latitude |
| 47 | + // Longitude is the index for the longitude in a postal code entry. |
| 48 | + Longitude |
| 49 | + // Accuracy is the index for the accuracy in a postal code entry. |
| 50 | + Accuracy |
| 51 | +) |
| 52 | + |
| 53 | +// FetchCountry fetches postal code entries for a specific country code from the GeoNames database. |
| 54 | +// It leverages the HTTP ETag mechanism to minimize data transfer for unchanged postal code data. |
| 55 | +// |
| 56 | +// The function takes two arguments: |
| 57 | +// |
| 58 | +// cc: The country code for which postal code data is to be fetched. |
| 59 | +// etag: An ETag value from a previous request to this function. |
| 60 | +// |
| 61 | +// If the data for the given country code has not changed since the last request with the provided ETag, |
| 62 | +// the function returns with 'modified' set to false, and no new data is fetched. |
| 63 | +// |
| 64 | +// If the data has changed, or if this is the first request (indicated by an empty etag), |
| 65 | +// the function fetches the updated data, sets 'modified' to true, and returns the new data along with the new ETag. |
| 66 | +// |
| 67 | +// Example usage: |
| 68 | +// |
| 69 | +// entries, modified, newEtag, err := FetchCountry("US", previousEtag) |
| 70 | +// if err != nil { |
| 71 | +// // Handle error |
| 72 | +// } |
| 73 | +// if modified { |
| 74 | +// // Process new entries |
| 75 | +// // Save newEtag for future requests |
| 76 | +// } |
| 77 | +// |
| 78 | +// See https://download.geonames.org/export/zip/ for a list of available countries. |
| 79 | +func FetchCountry(cc, etag string) (entries []Entry, modified bool, newEtag string, err error) { |
| 80 | + cc, err = normalizeCountryCode(cc) |
| 81 | + if err != nil { |
| 82 | + return |
| 83 | + } |
| 84 | + |
| 85 | + url := downloadURL(cc) |
| 86 | + zipData, modified, newEtag, err := download(url, etag) |
| 87 | + if !modified || err != nil { |
| 88 | + return |
| 89 | + } |
| 90 | + |
| 91 | + filename := zippedFile(cc) |
| 92 | + csvData, err := unzipFile(zipData, filename) |
| 93 | + if err != nil { |
| 94 | + return |
| 95 | + } |
| 96 | + |
| 97 | + entries, err = parseCSV(csvData) |
| 98 | + |
| 99 | + return |
| 100 | +} |
| 101 | + |
| 102 | +func normalizeCountryCode(cc string) (string, error) { |
| 103 | + r := strings.ToUpper(cc) |
| 104 | + if got, want := len(cc), 2; got != want { |
| 105 | + return r, fmt.Errorf("country code %q has %d bytes, want %d", cc, got, want) |
| 106 | + } |
| 107 | + return r, nil |
| 108 | +} |
| 109 | + |
| 110 | +func downloadURL(cc string) string { |
| 111 | + return fmt.Sprintf("https://download.geonames.org/export/zip/%s.zip", cc) |
| 112 | +} |
| 113 | + |
| 114 | +func download(url, etag string) ([]byte, bool, string, error) { |
| 115 | + req, err := http.NewRequest(http.MethodGet, url, nil) |
| 116 | + if err != nil { |
| 117 | + return nil, false, "", err |
| 118 | + } |
| 119 | + req.Header.Add("If-None-Match", etag) |
| 120 | + resp, err := HTTPClient.Do(req) |
| 121 | + if err != nil { |
| 122 | + return nil, false, "", err |
| 123 | + } |
| 124 | + defer func(Body io.ReadCloser) { |
| 125 | + err = Body.Close() |
| 126 | + }(resp.Body) |
| 127 | + |
| 128 | + if resp.StatusCode == http.StatusNotModified { |
| 129 | + // No new codes and no error. |
| 130 | + return nil, false, etag, nil |
| 131 | + } |
| 132 | + |
| 133 | + if resp.StatusCode != http.StatusOK { |
| 134 | + return nil, false, "", fmt.Errorf("status = %s, want 200", resp.Status) |
| 135 | + } |
| 136 | + |
| 137 | + body, err := io.ReadAll(resp.Body) |
| 138 | + if err != nil { |
| 139 | + return nil, false, "", fmt.Errorf("read response body: %w", err) |
| 140 | + } |
| 141 | + |
| 142 | + return body, true, resp.Header.Get("Etag"), nil |
| 143 | +} |
| 144 | + |
| 145 | +func zippedFile(cc string) string { |
| 146 | + return fmt.Sprintf("%s.txt", cc) |
| 147 | +} |
| 148 | + |
| 149 | +func unzipFile(data []byte, filename string) (_ []byte, err error) { |
| 150 | + unzip, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) |
| 151 | + if err != nil { |
| 152 | + return nil, fmt.Errorf("create unzipping reader: %w", err) |
| 153 | + } |
| 154 | + var file *zip.File |
| 155 | + for _, f := range unzip.File { |
| 156 | + if f.Name == filename { |
| 157 | + file = f |
| 158 | + break |
| 159 | + } |
| 160 | + } |
| 161 | + if file == nil { |
| 162 | + return nil, fmt.Errorf("zipfile missing %s", filename) |
| 163 | + } |
| 164 | + |
| 165 | + rc, err := file.Open() |
| 166 | + if err != nil { |
| 167 | + return nil, fmt.Errorf("open zipped %s: %w", filename, err) |
| 168 | + } |
| 169 | + defer func(rc io.ReadCloser) { |
| 170 | + err = errors.Join(err, rc.Close()) |
| 171 | + }(rc) |
| 172 | + |
| 173 | + return io.ReadAll(rc) |
| 174 | +} |
| 175 | + |
| 176 | +func parseCSV(data []byte) ([]Entry, error) { |
| 177 | + r := bytes.NewReader(data) |
| 178 | + reader := csv.NewReader(r) |
| 179 | + reader.Comma = '\t' |
| 180 | + table, err := reader.ReadAll() |
| 181 | + if err != nil { |
| 182 | + return nil, err |
| 183 | + } |
| 184 | + es := make([]Entry, len(table)) |
| 185 | + for i, columns := range table { |
| 186 | + var e Entry |
| 187 | + for ii, col := range columns { |
| 188 | + e[ii] = col |
| 189 | + } |
| 190 | + es[i] = e |
| 191 | + } |
| 192 | + return es, nil |
| 193 | +} |
0 commit comments