Skip to content

Commit 3fd053f

Browse files
committed
Initial commit
0 parents  commit 3fd053f

File tree

7 files changed

+422
-0
lines changed

7 files changed

+422
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
test_data/DE.txt
2+
.idea

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 Nico Grashoff
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Postcode
2+
3+
## Overview
4+
The `postcode` package is designed for fetching and parsing postal code data, specifically from the GeoNames geographical database (https://www.geonames.org/).
5+
It offers a straightforward interface to download postal code data for various countries and parse them into a structured Go data type.
6+
7+
## Features
8+
- Download postal code data by country from the GeoNames database.
9+
- Utilizes HTTP ETag caching to minimize data transfer.
10+
- Parses the downloaded data into a structured format for easy use in Go applications.
11+
12+
## Installation
13+
To use the `postcode` package in your Go project, simply execute the following command:
14+
15+
```bash
16+
go get github.com/ngrash/postcode
17+
```
18+
19+
## Usage
20+
21+
### Fetching Postal Code Data
22+
To fetch postal code data for a specific country, use the `FetchCountry` function. This function also supports ETag caching to minimize unnecessary data transfers.
23+
24+
Example:
25+
```go
26+
package main
27+
28+
import "github.com/ngrash/postcode"
29+
30+
func main() {
31+
var previousEtag string
32+
entries, modified, newEtag, err := postcode.FetchCountry("US", previousEtag)
33+
if err != nil {
34+
// Handle error
35+
}
36+
if modified {
37+
// Process new entries
38+
// Save newEtag for future requests
39+
}
40+
}
41+
```
42+
43+
### Fields in Postal Code Entry
44+
Each postal code entry (`Entry` type) is an array of 12 strings, representing different data fields:
45+
46+
- `CountryCode`
47+
- `PostalCode`
48+
- `PlaceName`
49+
- `AdminName1`
50+
- `AdminCode1`
51+
- `AdminName2`
52+
- `AdminCode2`
53+
- `AdminName3`
54+
- `AdminCode3`
55+
- `Latitude`
56+
- `Longitude`
57+
- `Accuracy`
58+
59+
## Contributing
60+
Contributions to the `postcode` package are welcome. Please feel free to submit pull requests or open issues for bugs, feature requests, license problems or documentation improvements.
61+
62+
## License
63+
This project is licensed under the [MIT License](LICENSE).
64+
65+
The data downloaded from [GeoNames.org](http://geonames.org) is licensed under [Creative Commons Attribution 4.0 License](https://creativecommons.org/licenses/by/4.0/).
66+
This includes the ZIP file in the test_data directory as well as derived snippets used in the tests.
67+
Postal code databases for some countries may come with additional licenses. See [GeoName's readme.txt](https://download.geonames.org/export/zip/readme.txt) for details.

go.mod

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
module github.com/ngrash/postcode
2+
3+
go 1.21

postcode.go

+193
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
// Package postcode provides functionality specifically for downloading and parsing
2+
// postal code data from the GeoNames geographical database (https://www.geonames.org/).
3+
package postcode
4+
5+
import (
6+
"archive/zip"
7+
"bytes"
8+
"encoding/csv"
9+
"errors"
10+
"fmt"
11+
"io"
12+
"net/http"
13+
"strings"
14+
)
15+
16+
// HTTPClient is a global http.Client instance used for making HTTP requests.
17+
// This can be replaced or configured as needed to change the default HTTP behavior.
18+
var HTTPClient http.Client
19+
20+
// Entry represents a single postal code entry. It is an array of 12 strings, each representing a specific field of data.
21+
type Entry [12]string
22+
23+
// Field represents a specific field in a postal code entry.
24+
type Field int
25+
26+
const (
27+
// CountryCode is the index for the country code in a postal code entry.
28+
CountryCode Field = iota
29+
// PostalCode is the index for the postal code in a postal code entry.
30+
PostalCode
31+
// PlaceName is the index for the place name in a postal code entry.
32+
PlaceName
33+
// AdminName1 is the index for the first level of administrative division name in a postal code entry.
34+
AdminName1
35+
// AdminCode1 is the index for the first level of administrative division code in a postal code entry.
36+
AdminCode1
37+
// AdminName2 is the index for the second level of administrative division name in a postal code entry.
38+
AdminName2
39+
// AdminCode2 is the index for the second level of administrative division code in a postal code entry.
40+
AdminCode2
41+
// AdminName3 is the index for the third level of administrative division name in a postal code entry.
42+
AdminName3
43+
// AdminCode3 is the index for the third level of administrative division code in a postal code entry.
44+
AdminCode3
45+
// Latitude is the index for the latitude in a postal code entry.
46+
Latitude
47+
// Longitude is the index for the longitude in a postal code entry.
48+
Longitude
49+
// Accuracy is the index for the accuracy in a postal code entry.
50+
Accuracy
51+
)
52+
53+
// FetchCountry fetches postal code entries for a specific country code from the GeoNames database.
54+
// It leverages the HTTP ETag mechanism to minimize data transfer for unchanged postal code data.
55+
//
56+
// The function takes two arguments:
57+
//
58+
// cc: The country code for which postal code data is to be fetched.
59+
// etag: An ETag value from a previous request to this function.
60+
//
61+
// If the data for the given country code has not changed since the last request with the provided ETag,
62+
// the function returns with 'modified' set to false, and no new data is fetched.
63+
//
64+
// If the data has changed, or if this is the first request (indicated by an empty etag),
65+
// the function fetches the updated data, sets 'modified' to true, and returns the new data along with the new ETag.
66+
//
67+
// Example usage:
68+
//
69+
// entries, modified, newEtag, err := FetchCountry("US", previousEtag)
70+
// if err != nil {
71+
// // Handle error
72+
// }
73+
// if modified {
74+
// // Process new entries
75+
// // Save newEtag for future requests
76+
// }
77+
//
78+
// See https://download.geonames.org/export/zip/ for a list of available countries.
79+
func FetchCountry(cc, etag string) (entries []Entry, modified bool, newEtag string, err error) {
80+
cc, err = normalizeCountryCode(cc)
81+
if err != nil {
82+
return
83+
}
84+
85+
url := downloadURL(cc)
86+
zipData, modified, newEtag, err := download(url, etag)
87+
if !modified || err != nil {
88+
return
89+
}
90+
91+
filename := zippedFile(cc)
92+
csvData, err := unzipFile(zipData, filename)
93+
if err != nil {
94+
return
95+
}
96+
97+
entries, err = parseCSV(csvData)
98+
99+
return
100+
}
101+
102+
func normalizeCountryCode(cc string) (string, error) {
103+
r := strings.ToUpper(cc)
104+
if got, want := len(cc), 2; got != want {
105+
return r, fmt.Errorf("country code %q has %d bytes, want %d", cc, got, want)
106+
}
107+
return r, nil
108+
}
109+
110+
func downloadURL(cc string) string {
111+
return fmt.Sprintf("https://download.geonames.org/export/zip/%s.zip", cc)
112+
}
113+
114+
func download(url, etag string) ([]byte, bool, string, error) {
115+
req, err := http.NewRequest(http.MethodGet, url, nil)
116+
if err != nil {
117+
return nil, false, "", err
118+
}
119+
req.Header.Add("If-None-Match", etag)
120+
resp, err := HTTPClient.Do(req)
121+
if err != nil {
122+
return nil, false, "", err
123+
}
124+
defer func(Body io.ReadCloser) {
125+
err = Body.Close()
126+
}(resp.Body)
127+
128+
if resp.StatusCode == http.StatusNotModified {
129+
// No new codes and no error.
130+
return nil, false, etag, nil
131+
}
132+
133+
if resp.StatusCode != http.StatusOK {
134+
return nil, false, "", fmt.Errorf("status = %s, want 200", resp.Status)
135+
}
136+
137+
body, err := io.ReadAll(resp.Body)
138+
if err != nil {
139+
return nil, false, "", fmt.Errorf("read response body: %w", err)
140+
}
141+
142+
return body, true, resp.Header.Get("Etag"), nil
143+
}
144+
145+
func zippedFile(cc string) string {
146+
return fmt.Sprintf("%s.txt", cc)
147+
}
148+
149+
func unzipFile(data []byte, filename string) (_ []byte, err error) {
150+
unzip, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
151+
if err != nil {
152+
return nil, fmt.Errorf("create unzipping reader: %w", err)
153+
}
154+
var file *zip.File
155+
for _, f := range unzip.File {
156+
if f.Name == filename {
157+
file = f
158+
break
159+
}
160+
}
161+
if file == nil {
162+
return nil, fmt.Errorf("zipfile missing %s", filename)
163+
}
164+
165+
rc, err := file.Open()
166+
if err != nil {
167+
return nil, fmt.Errorf("open zipped %s: %w", filename, err)
168+
}
169+
defer func(rc io.ReadCloser) {
170+
err = errors.Join(err, rc.Close())
171+
}(rc)
172+
173+
return io.ReadAll(rc)
174+
}
175+
176+
func parseCSV(data []byte) ([]Entry, error) {
177+
r := bytes.NewReader(data)
178+
reader := csv.NewReader(r)
179+
reader.Comma = '\t'
180+
table, err := reader.ReadAll()
181+
if err != nil {
182+
return nil, err
183+
}
184+
es := make([]Entry, len(table))
185+
for i, columns := range table {
186+
var e Entry
187+
for ii, col := range columns {
188+
e[ii] = col
189+
}
190+
es[i] = e
191+
}
192+
return es, nil
193+
}

0 commit comments

Comments
 (0)