From 20d7693d6be22a81e82ea6d72fdda12f3f636f87 Mon Sep 17 00:00:00 2001 From: Jake Burkhead Date: Wed, 18 Jan 2017 22:25:41 -0800 Subject: [PATCH] break outer loop when sampling --- detector/Fixtures/test2.csv | 151 ++++++++++++++++++++++++++++++++++++ detector/detect.go | 5 +- detector/detect_test.go | 12 +++ 3 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 detector/Fixtures/test2.csv diff --git a/detector/Fixtures/test2.csv b/detector/Fixtures/test2.csv new file mode 100644 index 0000000..08f772c --- /dev/null +++ b/detector/Fixtures/test2.csv @@ -0,0 +1,151 @@ +"Sepal.Length","Sepal.Width","Petal.Length","Petal.Width","Species" +5.1,3.5,1.4,0.2,"setosa" +4.9,3,1.4,0.2,"setosa" +4.7,3.2,1.3,0.2,"setosa" +4.6,3.1,1.5,0.2,"setosa" +5,3.6,1.4,0.2,"setosa" +5.4,3.9,1.7,0.4,"setosa" +4.6,3.4,1.4,0.3,"setosa" +5,3.4,1.5,0.2,"setosa" +4.4,2.9,1.4,0.2,"setosa" +4.9,3.1,1.5,0.1,"setosa" +5.4,3.7,1.5,0.2,"setosa" +4.8,3.4,1.6,0.2,"setosa" +4.8,3,1.4,0.1,"setosa" +4.3,3,1.1,0.1,"setosa" +5.8,4,1.2,0.2,"setosa" +5.7,4.4,1.5,0.4,"setosa" +5.4,3.9,1.3,0.4,"setosa" +5.1,3.5,1.4,0.3,"setosa" +5.7,3.8,1.7,0.3,"setosa" +5.1,3.8,1.5,0.3,"setosa" +5.4,3.4,1.7,0.2,"setosa" +5.1,3.7,1.5,0.4,"setosa" +4.6,3.6,1,0.2,"setosa" +5.1,3.3,1.7,0.5,"setosa" +4.8,3.4,1.9,0.2,"setosa" +5,3,1.6,0.2,"setosa" +5,3.4,1.6,0.4,"setosa" +5.2,3.5,1.5,0.2,"setosa" +5.2,3.4,1.4,0.2,"setosa" +4.7,3.2,1.6,0.2,"setosa" +4.8,3.1,1.6,0.2,"setosa" +5.4,3.4,1.5,0.4,"setosa" +5.2,4.1,1.5,0.1,"setosa" +5.5,4.2,1.4,0.2,"setosa" +4.9,3.1,1.5,0.2,"setosa" +5,3.2,1.2,0.2,"setosa" +5.5,3.5,1.3,0.2,"setosa" +4.9,3.6,1.4,0.1,"setosa" +4.4,3,1.3,0.2,"setosa" +5.1,3.4,1.5,0.2,"setosa" +5,3.5,1.3,0.3,"setosa" +4.5,2.3,1.3,0.3,"setosa" +4.4,3.2,1.3,0.2,"setosa" +5,3.5,1.6,0.6,"setosa" +5.1,3.8,1.9,0.4,"setosa" +4.8,3,1.4,0.3,"setosa" +5.1,3.8,1.6,0.2,"setosa" +4.6,3.2,1.4,0.2,"setosa" +5.3,3.7,1.5,0.2,"setosa" +5,3.3,1.4,0.2,"setosa" +7,3.2,4.7,1.4,"versicolor" +6.4,3.2,4.5,1.5,"versicolor" +6.9,3.1,4.9,1.5,"versicolor" +5.5,2.3,4,1.3,"versicolor" +6.5,2.8,4.6,1.5,"versicolor" +5.7,2.8,4.5,1.3,"versicolor" +6.3,3.3,4.7,1.6,"versicolor" +4.9,2.4,3.3,1,"versicolor" +6.6,2.9,4.6,1.3,"versicolor" +5.2,2.7,3.9,1.4,"versicolor" +5,2,3.5,1,"versicolor" +5.9,3,4.2,1.5,"versicolor" +6,2.2,4,1,"versicolor" +6.1,2.9,4.7,1.4,"versicolor" +5.6,2.9,3.6,1.3,"versicolor" +6.7,3.1,4.4,1.4,"versicolor" +5.6,3,4.5,1.5,"versicolor" +5.8,2.7,4.1,1,"versicolor" +6.2,2.2,4.5,1.5,"versicolor" +5.6,2.5,3.9,1.1,"versicolor" +5.9,3.2,4.8,1.8,"versicolor" +6.1,2.8,4,1.3,"versicolor" +6.3,2.5,4.9,1.5,"versicolor" +6.1,2.8,4.7,1.2,"versicolor" +6.4,2.9,4.3,1.3,"versicolor" +6.6,3,4.4,1.4,"versicolor" +6.8,2.8,4.8,1.4,"versicolor" +6.7,3,5,1.7,"versicolor" +6,2.9,4.5,1.5,"versicolor" +5.7,2.6,3.5,1,"versicolor" +5.5,2.4,3.8,1.1,"versicolor" +5.5,2.4,3.7,1,"versicolor" +5.8,2.7,3.9,1.2,"versicolor" +6,2.7,5.1,1.6,"versicolor" +5.4,3,4.5,1.5,"versicolor" +6,3.4,4.5,1.6,"versicolor" +6.7,3.1,4.7,1.5,"versicolor" +6.3,2.3,4.4,1.3,"versicolor" +5.6,3,4.1,1.3,"versicolor" +5.5,2.5,4,1.3,"versicolor" +5.5,2.6,4.4,1.2,"versicolor" +6.1,3,4.6,1.4,"versicolor" +5.8,2.6,4,1.2,"versicolor" +5,2.3,3.3,1,"versicolor" +5.6,2.7,4.2,1.3,"versicolor" +5.7,3,4.2,1.2,"versicolor" +5.7,2.9,4.2,1.3,"versicolor" +6.2,2.9,4.3,1.3,"versicolor" +5.1,2.5,3,1.1,"versicolor" +5.7,2.8,4.1,1.3,"versicolor" +6.3,3.3,6,2.5,"virginica" +5.8,2.7,5.1,1.9,"virginica" +7.1,3,5.9,2.1,"virginica" +6.3,2.9,5.6,1.8,"virginica" +6.5,3,5.8,2.2,"virginica" +7.6,3,6.6,2.1,"virginica" +4.9,2.5,4.5,1.7,"virginica" +7.3,2.9,6.3,1.8,"virginica" +6.7,2.5,5.8,1.8,"virginica" +7.2,3.6,6.1,2.5,"virginica" +6.5,3.2,5.1,2,"virginica" +6.4,2.7,5.3,1.9,"virginica" +6.8,3,5.5,2.1,"virginica" +5.7,2.5,5,2,"virginica" +5.8,2.8,5.1,2.4,"virginica" +6.4,3.2,5.3,2.3,"virginica" +6.5,3,5.5,1.8,"virginica" +7.7,3.8,6.7,2.2,"virginica" +7.7,2.6,6.9,2.3,"virginica" +6,2.2,5,1.5,"virginica" +6.9,3.2,5.7,2.3,"virginica" +5.6,2.8,4.9,2,"virginica" +7.7,2.8,6.7,2,"virginica" +6.3,2.7,4.9,1.8,"virginica" +6.7,3.3,5.7,2.1,"virginica" +7.2,3.2,6,1.8,"virginica" +6.2,2.8,4.8,1.8,"virginica" +6.1,3,4.9,1.8,"virginica" +6.4,2.8,5.6,2.1,"virginica" +7.2,3,5.8,1.6,"virginica" +7.4,2.8,6.1,1.9,"virginica" +7.9,3.8,6.4,2,"virginica" +6.4,2.8,5.6,2.2,"virginica" +6.3,2.8,5.1,1.5,"virginica" +6.1,2.6,5.6,1.4,"virginica" +7.7,3,6.1,2.3,"virginica" +6.3,3.4,5.6,2.4,"virginica" +6.4,3.1,5.5,1.8,"virginica" +6,3,4.8,1.8,"virginica" +6.9,3.1,5.4,2.1,"virginica" +6.7,3.1,5.6,2.4,"virginica" +6.9,3.1,5.1,2.3,"virginica" +5.8,2.7,5.1,1.9,"virginica" +6.8,3.2,5.9,2.3,"virginica" +6.7,3.3,5.7,2.5,"virginica" +6.7,3,5.2,2.3,"virginica" +6.3,2.5,5,1.9,"virginica" +6.5,3,5.2,2,"virginica" +6.2,3.4,5.4,2.3,"virginica" +5.9,3,5.1,1.8,"virginica" diff --git a/detector/detect.go b/detector/detect.go index a3e51b6..0cf6e42 100644 --- a/detector/detect.go +++ b/detector/detect.go @@ -85,8 +85,9 @@ func (d *detector) sample(reader io.Reader, sampleLines int, enclosure byte) (fr } } else if (current == '\n' && prev != '\r' || current == '\r') && !enclosed { actualSampleLines++ - if actualSampleLines >= sampleLines { - break + if actualSampleLines > sampleLines { + actualSampleLines-- + return } } else if !enclosed { if !d.nonDelimiterRegex.MatchString(string(current)) { diff --git a/detector/detect_test.go b/detector/detect_test.go index 04ab8e0..87c23e2 100644 --- a/detector/detect_test.go +++ b/detector/detect_test.go @@ -123,3 +123,15 @@ func TestDetectAnalyze(t *testing.T) { assert.Equal(t, []byte{','}, candidates) } + +func TestDetectLargeFile(t *testing.T) { + detector := New() + + file, err := os.OpenFile("./Fixtures/test2.csv", os.O_RDONLY, os.ModePerm) + assert.NoError(t, err) + defer file.Close() + + delimiters := detector.DetectDelimiter(file, '"') + + assert.Equal(t, []string{","}, delimiters) +}