Skip to content
This repository was archived by the owner on May 5, 2019. It is now read-only.

Commit 88b20ca

Browse files
committed
save work
1 parent de280ba commit 88b20ca

File tree

2 files changed

+68
-32
lines changed

2 files changed

+68
-32
lines changed

src/abstractdatatable/join.jl

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -41,47 +41,60 @@ Base.length(x::RowIndexMap) = length(x.orig)
4141

4242
# composes the joined data table using the maps between the left and right
4343
# table rows and the indices of rows in the result
44-
function compose_joined_table(joiner::DataTableJoiner, kind::Symbol,
44+
function compose_joined_table(joiner::DataTableJoiner,
4545
left_ixs::RowIndexMap, leftonly_ixs::RowIndexMap,
4646
right_ixs::RowIndexMap, rightonly_ixs::RowIndexMap)
4747
@assert length(left_ixs) == length(right_ixs)
4848
# compose left half of the result taking all left columns
4949
all_orig_left_ixs = vcat(left_ixs.orig, leftonly_ixs.orig)
50-
if length(leftonly_ixs) > 0
50+
51+
lil = length(left_ixs)
52+
loil = length(leftonly_ixs)
53+
ril = length(right_ixs)
54+
roil = length(rightonly_ixs)
55+
56+
if loil > 0
5157
# combine the matched (left_ixs.orig) and non-matched (leftonly_ixs.orig) indices of the left table rows
5258
# preserving the original rows order
53-
all_orig_left_ixs = similar(left_ixs.orig, length(left_ixs)+length(leftonly_ixs))
59+
all_orig_left_ixs = similar(left_ixs.orig, lil + loil)
5460
@inbounds all_orig_left_ixs[left_ixs.join] = left_ixs.orig
5561
@inbounds all_orig_left_ixs[leftonly_ixs.join] = leftonly_ixs.orig
5662
else
5763
# the result contains only the left rows that are matched to right rows (left_ixs)
5864
all_orig_left_ixs = left_ixs.orig # no need to copy left_ixs.orig as it's not used elsewhere
5965
end
60-
ril = length(right_ixs)
61-
loil = length(leftonly_ixs)
62-
roil = length(rightonly_ixs)
63-
left_dt = DataTable(Any[resize!(col[all_orig_left_ixs], length(all_orig_left_ixs)+roil)
64-
for col in columns(joiner.dtl)],
65-
names(joiner.dtl))
6666

67-
# compose right half of the result taking all right columns excluding on
68-
dtr_noon = without(joiner.dtr, joiner.on_cols)
6967
# permutation to swap rightonly and leftonly rows
7068
right_perm = vcat(1:ril, ril+roil+1:ril+roil+loil, ril+1:ril+roil)
7169
if length(leftonly_ixs) > 0
7270
# compose right_perm with the permutation that restores left rows order
7371
right_perm[vcat(right_ixs.join, leftonly_ixs.join)] = right_perm[1:ril+loil]
7472
end
7573
all_orig_right_ixs = vcat(right_ixs.orig, rightonly_ixs.orig)
76-
resizelen = length(all_orig_right_ixs)+length(leftonly_ixs)
77-
rightcols = Any[kind == :inner ?
78-
col[all_orig_right_ixs][right_perm] :
79-
copy!(similar_nullable(col, resizelen), col[all_orig_right_ixs])[right_perm]
80-
for col in columns(dtr_noon)]
81-
right_dt = DataTable(rightcols, names(dtr_noon))
82-
# merge left and right parts of the joined table
83-
res = hcat!(left_dt, right_dt)
8474

75+
# compose right half of the result taking all right columns excluding on
76+
dtr_noon = without(joiner.dtr, joiner.on_cols)
77+
78+
laoli = length(all_orig_left_ixs)
79+
laori = length(all_orig_right_ixs)
80+
@assert laoli + roil == laori + loil
81+
numrows = length(all_orig_left_ixs) + roil
82+
numcols = ncol(joiner.dtl) + ncol(dtr_noon)
83+
84+
# if either size is smaller, then it's null
85+
leftnull = laoli < laoli + roil
86+
rightnull = laori < laori + loil
87+
dtcols = Vector{Any}(numcols)
88+
for (i,col) in enumerate(columns(joiner.dtl))
89+
dtcols[i] = leftnull ? copy!(similar_nullable(col, numrows), col[all_orig_left_ixs]) :
90+
col[all_orig_left_ixs]
91+
end
92+
for (i,col) in enumerate(columns(dtr_noon))
93+
dtcols[i+ncol(joiner.dtl)] = rightnull ? copy!(similar_nullable(col, numrows), col[all_orig_right_ixs])[right_perm] :
94+
col[all_orig_right_ixs][right_perm]
95+
end
96+
colnames = vcat(names(joiner.dtl), names(dtr_noon))
97+
res = DataTable(dtcols, Index(colnames))
8598
if length(rightonly_ixs.join) > 0
8699
# some left rows are nulls, so the values of the "on" columns
87100
# need to be taken from the right
@@ -207,6 +220,8 @@ join(dt1::AbstractDataTable,
207220
- `:cross` : a full Cartesian product of the key combinations; every
208221
row of `dt1` is matched with every row of `dt2`
209222
223+
For the three join operations that may introduce missing values, `:outer`, `:left`,
224+
and `:right`,
210225
Null values are filled in where needed to complete joins.
211226
212227
### Result
@@ -243,22 +258,21 @@ function Base.join(dt1::AbstractDataTable,
243258
joiner = DataTableJoiner(dt1, dt2, on)
244259

245260
if kind == :inner
246-
compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
247-
group_rows(joiner.dtr_on),
248-
true, false, true, false)...)
261+
compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
262+
group_rows(joiner.dtr_on),
263+
true, false, true, false)...)
249264
elseif kind == :left
250-
compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
251-
group_rows(joiner.dtr_on),
252-
true, true, true, false)...)
265+
compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
266+
group_rows(joiner.dtr_on),
267+
true, true, true, false)...)
253268
elseif kind == :right
254-
right_ixs, rightonly_ixs, left_ixs, leftonly_ixs = update_row_maps!(joiner.dtr_on, joiner.dtl_on,
255-
group_rows(joiner.dtl_on),
256-
true, true, true, false)
257-
compose_joined_table(joiner, kind, left_ixs, leftonly_ixs, right_ixs, rightonly_ixs)
269+
compose_joined_table(joiner, update_row_maps!(joiner.dtr_on, joiner.dtl_on,
270+
group_rows(joiner.dtl_on),
271+
true, true, true, false)[[3, 4, 1, 2]]...)
258272
elseif kind == :outer
259-
compose_joined_table(joiner, kind, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
260-
group_rows(joiner.dtr_on),
261-
true, true, true, true)...)
273+
compose_joined_table(joiner, update_row_maps!(joiner.dtl_on, joiner.dtr_on,
274+
group_rows(joiner.dtr_on),
275+
true, true, true, true)...)
262276
elseif kind == :semi
263277
# hash the right rows
264278
dtr_on_grp = group_rows(joiner.dtr_on)

test/join.jl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,4 +117,26 @@ module TestJoin
117117
DataTable([collect(1:10), collect(2:11), collect(3:12)], [:x, :y, :z])
118118
@test join(dtnull, dt, on = :x) ==
119119
DataTable([collect(1:10), collect(3:12), collect(2:11)], [:x, :z, :y])
120+
121+
@testset "missingness" begin
122+
small = DataTable(fruit = [:banana, :plantain, :melon],
123+
vegetable = [:artichoke, :leek, :pepper])
124+
large = DataTable(fruit = [:banana, :plantain, :melon, :raspberry],
125+
vegetable = [:artichoke, :collards, :leek, :pepper])
126+
127+
@test join(small, large, on=:fruit, kind=:left) == DataTable(fruit = [:banana, :plantain, :melon],
128+
vegetable = [:artichoke, :leek, :pepper],
129+
vegetable_1 = [:artichoke, :collards, :leek])
130+
@test join(small, large, on=:fruit, kind=:right) == DataTable(fruit = [:banana, :plantain, :melon],
131+
vegetable = [:artichoke, :leek, :pepper],
132+
vegetable_1 = [:artichoke, :collards, :leek])
133+
@test join(small, large, on=:fruit, kind=:outer)
134+
135+
@test join(small, large, on=:vegetable, kind=:left)
136+
@test join(small, large, on=:vegetable, kind=:right)
137+
@test join(small, large, on=:vegetable, kind=:outer)
138+
139+
@test join(small, large, on=[:fruit, :vegetable], kind=:outer)
140+
@test join(small, large, on=[:fruit, :vegetable], kind=:left)
141+
@test join(small, large, on=[:fruit, :vegetable], kind=:right)
120142
end

0 commit comments

Comments
 (0)