1
- from .utilities import create_file_object , df_generator , logger , cast_pandas
2
- from ._base_copy import BaseCopy
3
1
import pandas as pd
2
+ from .utilities import create_file_object , df_generator , cast_pandas
3
+ from ._base_copy import BaseCopy
4
4
5
5
6
6
class HDFTableCopy (BaseCopy ):
@@ -90,33 +90,35 @@ def hdf_to_pg(self, data_formatters=[cast_pandas], data_formatter_kwargs={}):
90
90
data_formatter_kwargs: list of kwargs to pass to data_formatters functions
91
91
"""
92
92
if self .hdf_tables is None :
93
- logger .warn ("No HDF table found for SQL table {}" .format (self .sql_table ))
93
+ self .logger .warn (
94
+ "No HDF table found for SQL table {}" .format (self .sql_table )
95
+ )
94
96
return
95
97
96
98
for hdf_table in self .hdf_tables :
97
- logger .info ("*** {} ***" .format (hdf_table ))
99
+ self . logger .info ("*** {} ***" .format (hdf_table ))
98
100
99
- logger .info ("Reading HDF table" )
101
+ self . logger .info ("Reading HDF table" )
100
102
df = pd .read_hdf (self .file_name , key = hdf_table )
101
103
self .rows += len (df )
102
104
103
105
data_formatter_kwargs ["hdf_table" ] = hdf_table
104
- logger .info ("Formatting data" )
106
+ self . logger .info ("Formatting data" )
105
107
df = self .data_formatting (
106
108
df , functions = data_formatters , ** data_formatter_kwargs
107
109
)
108
110
109
- logger .info ("Creating generator for chunking dataframe" )
110
- for chunk in df_generator (df , self .csv_chunksize ):
111
+ self . logger .info ("Creating generator for chunking dataframe" )
112
+ for chunk in df_generator (df , self .csv_chunksize , logger = self . logger ):
111
113
112
- logger .info ("Creating CSV in memory" )
114
+ self . logger .info ("Creating CSV in memory" )
113
115
fo = create_file_object (chunk )
114
116
115
- logger .info ("Copying chunk to database" )
117
+ self . logger .info ("Copying chunk to database" )
116
118
self .copy_from_file (fo )
117
119
del fo
118
120
del df
119
- logger .info ("All chunks copied ({} rows)" .format (self .rows ))
121
+ self . logger .info ("All chunks copied ({} rows)" .format (self .rows ))
120
122
121
123
122
124
class SmallHDFTableCopy (HDFTableCopy ):
@@ -136,29 +138,29 @@ def hdf_to_pg(self, data_formatters=[cast_pandas], data_formatter_kwargs={}):
136
138
data_formatter_kwargs: list of kwargs to pass to data_formatters functions
137
139
"""
138
140
if self .hdf_tables is None :
139
- logger .warn ("No HDF table found for SQL table {self.sql_table}" )
141
+ self . logger .warn ("No HDF table found for SQL table {self.sql_table}" )
140
142
return
141
143
142
144
for hdf_table in self .hdf_tables :
143
- logger .info ("*** {} ***" .format (hdf_table ))
144
- logger .info ("Reading HDF table" )
145
+ self . logger .info ("*** {} ***" .format (hdf_table ))
146
+ self . logger .info ("Reading HDF table" )
145
147
df = pd .read_hdf (self .file_name , key = hdf_table )
146
148
self .rows += len (df )
147
149
148
150
data_formatter_kwargs ["hdf_table" ] = hdf_table
149
- logger .info ("Formatting data" )
151
+ self . logger .info ("Formatting data" )
150
152
df = self .data_formatting (
151
153
df , functions = data_formatters , ** data_formatter_kwargs
152
154
)
153
155
154
- logger .info ("Creating CSV in memory" )
156
+ self . logger .info ("Creating CSV in memory" )
155
157
fo = create_file_object (df )
156
158
157
- logger .info ("Copying table to database" )
159
+ self . logger .info ("Copying table to database" )
158
160
self .copy_from_file (fo )
159
161
del df
160
162
del fo
161
- logger .info ("All chunks copied ({} rows)" .format (self .rows ))
163
+ self . logger .info ("All chunks copied ({} rows)" .format (self .rows ))
162
164
163
165
164
166
class BigHDFTableCopy (HDFTableCopy ):
@@ -181,11 +183,13 @@ def hdf_to_pg(self, data_formatters=[cast_pandas], data_formatter_kwargs={}):
181
183
data_formatter_kwargs: list of kwargs to pass to data_formatters functions
182
184
"""
183
185
if self .hdf_tables is None :
184
- logger .warn ("No HDF table found for SQL table {}" .format (self .sql_table ))
186
+ self .logger .warn (
187
+ "No HDF table found for SQL table {}" .format (self .sql_table )
188
+ )
185
189
return
186
190
187
191
for hdf_table in self .hdf_tables :
188
- logger .info ("*** {} ***" .format (hdf_table ))
192
+ self . logger .info ("*** {} ***" .format (hdf_table ))
189
193
190
194
with pd .HDFStore (self .file_name ) as store :
191
195
nrows = store .get_storer (hdf_table ).nrows
@@ -199,26 +203,28 @@ def hdf_to_pg(self, data_formatters=[cast_pandas], data_formatter_kwargs={}):
199
203
start = 0
200
204
201
205
for i in range (n_chunks ):
202
- logger .info ("*** HDF chunk {i} of {n} ***" .format (i = i + 1 , n = n_chunks ))
203
- logger .info ("Reading HDF table" )
206
+ self .logger .info (
207
+ "*** HDF chunk {i} of {n} ***" .format (i = i + 1 , n = n_chunks )
208
+ )
209
+ self .logger .info ("Reading HDF table" )
204
210
stop = min (start + self .hdf_chunksize , nrows )
205
211
df = pd .read_hdf (self .file_name , key = hdf_table , start = start , stop = stop )
206
212
207
213
start += self .hdf_chunksize
208
214
209
215
data_formatter_kwargs ["hdf_table" ] = hdf_table
210
- logger .info ("Formatting data" )
216
+ self . logger .info ("Formatting data" )
211
217
df = self .data_formatting (
212
218
df , functions = data_formatters , ** data_formatter_kwargs
213
219
)
214
220
215
- logger .info ("Creating generator for chunking dataframe" )
216
- for chunk in df_generator (df , self .csv_chunksize ):
217
- logger .info ("Creating CSV in memory" )
221
+ self . logger .info ("Creating generator for chunking dataframe" )
222
+ for chunk in df_generator (df , self .csv_chunksize , logger = self . logger ):
223
+ self . logger .info ("Creating CSV in memory" )
218
224
fo = create_file_object (chunk )
219
225
220
- logger .info ("Copying chunk to database" )
226
+ self . logger .info ("Copying chunk to database" )
221
227
self .copy_from_file (fo )
222
228
del fo
223
229
del df
224
- logger .info ("All chunks copied ({} rows)" .format (self .rows ))
230
+ self . logger .info ("All chunks copied ({} rows)" .format (self .rows ))
0 commit comments