43
43
*/
44
44
class BuildQueueCommand extends Command
45
45
{
46
+ public function __construct (
47
+ private readonly JsonCompatibilityConverter $ jsonCompatibilityConverter ,
48
+ private readonly EventDispatcher $ eventDispatcher ,
49
+ private readonly QueueRepository $ queueRepository ,
50
+ private readonly PageRepository $ pageRepository ,
51
+ private readonly CrawlerController $ crawlerController ,
52
+ ) {
53
+ parent ::__construct ();
54
+ }
55
+
46
56
protected function configure (): void
47
57
{
48
58
$ this ->setDescription ('Create entries in the queue that can be processed at once ' );
@@ -110,27 +120,21 @@ protected function configure(): void
110
120
*/
111
121
protected function execute (InputInterface $ input , OutputInterface $ output ): int
112
122
{
113
- /** @var JsonCompatibilityConverter $jsonCompatibilityConverter */
114
- $ jsonCompatibilityConverter = GeneralUtility::makeInstance (JsonCompatibilityConverter::class);
115
123
$ mode = $ input ->getOption ('mode ' ) ?? 'queue ' ;
116
124
117
125
$ extensionSettings = GeneralUtility::makeInstance (
118
126
ExtensionConfigurationProvider::class
119
127
)->getExtensionConfiguration ();
120
- $ eventDispatcher = GeneralUtility::makeInstance (EventDispatcher::class);
121
128
122
129
/** @var CrawlerController $crawlerController */
123
130
$ crawlerController = GeneralUtility::makeInstance (CrawlerController::class);
124
- /** @var QueueRepository $queueRepository */
125
- $ queueRepository = GeneralUtility::makeInstance (QueueRepository::class);
126
- $ pageRepository = GeneralUtility::makeInstance (PageRepository::class);
127
131
128
132
if ($ mode === 'exec ' ) {
129
133
$ crawlerController ->registerQueueEntriesInternallyOnly = true ;
130
134
}
131
135
132
136
$ pageId = MathUtility::forceIntegerInRange ((int ) $ input ->getArgument ('page ' ), 0 );
133
- if ($ pageId === 0 || empty ($ pageRepository ->getPage ($ pageId ))) {
137
+ if ($ pageId === 0 || empty ($ this -> pageRepository ->getPage ($ pageId ))) {
134
138
$ message = "Page {$ pageId } is not a valid page, please check you root page id and try again. " ;
135
139
MessageUtility::addErrorMessage ($ message );
136
140
$ output ->writeln ("<info> {$ message }</info> " );
@@ -143,85 +147,24 @@ protected function execute(InputInterface $input, OutputInterface $output): int
143
147
$ reason = new Reason ();
144
148
$ reason ->setReason (Reason::REASON_CLI_SUBMIT );
145
149
$ reason ->setDetailText ('The cli script of the crawler added to the queue ' );
146
- $ eventDispatcher ->dispatch (new InvokeQueueChangeEvent ($ reason ));
150
+ $ this -> eventDispatcher ->dispatch (new InvokeQueueChangeEvent ($ reason ));
147
151
}
148
152
149
153
if ($ extensionSettings ['cleanUpOldQueueEntries ' ]) {
150
- $ queueRepository ->cleanUpOldQueueEntries ();
154
+ $ this -> queueRepository ->cleanUpOldQueueEntries ();
151
155
}
152
156
153
- $ crawlerController ->setID = GeneralUtility::md5int (microtime ());
154
- $ queueRows = $ crawlerController ->getPageTreeAndUrls (
155
- $ pageId ,
156
- MathUtility::forceIntegerInRange ((int ) $ input ->getOption ('depth ' ), 0 , 99 ),
157
- $ crawlerController ->getCurrentTime (),
158
- MathUtility::forceIntegerInRange ((int ) $ input ->getOption ('number ' ) ?: 30 , 1 , 1000 ),
159
- $ mode === 'queue ' || $ mode === 'exec ' ,
160
- $ mode === 'url ' ,
161
- [],
162
- $ configurationKeys
163
- );
157
+ $ this ->crawlerController ->setID = GeneralUtility::md5int (microtime ());
158
+ $ queueRows = $ this ->getQueueRows ($ pageId , $ input , $ mode , $ configurationKeys );
164
159
165
- // Consider a swith/match statement here, and extract the code in between.
166
- if ($ mode === 'url ' ) {
167
- $ output ->writeln ('<info> ' . implode (PHP_EOL , $ crawlerController ->downloadUrls ) . PHP_EOL . '</info> ' );
168
- } elseif ($ mode === 'exec ' ) {
169
- $ progressBar = new ProgressBar ($ output );
170
- $ output ->writeln ('<info>Executing ' . count ($ crawlerController ->urlList ) . ' requests right away:</info> ' );
171
- $ this ->outputUrls ($ queueRows , $ output );
172
- $ output ->writeln ('<info>Processing</info> ' . PHP_EOL );
173
-
174
- foreach ($ progressBar ->iterate ($ crawlerController ->queueEntries ) as $ queueRec ) {
175
- $ p = $ jsonCompatibilityConverter ->convert ($ queueRec ['parameters ' ]);
176
- if (is_bool ($ p )) {
177
- continue ;
178
- }
179
-
180
- $ progressBar ->clear ();
181
- if (empty ($ p ['procInstructions ' ][0 ])) {
182
- $ procInstructionsString = '' ;
183
- } else {
184
- $ procInstructionsString = ' ( ' . implode (', ' , $ p ['procInstructions ' ]) . ') ' ;
185
- }
186
- $ output ->writeln ('<info> ' . $ p ['url ' ] . $ procInstructionsString . ' => ' . '</info> ' );
187
- $ progressBar ->display ();
188
-
189
- $ result = $ crawlerController ->readUrlFromArray ($ queueRec );
190
-
191
- $ resultContent = $ result ['content ' ] ?? '' ;
192
- $ requestResult = $ jsonCompatibilityConverter ->convert ($ resultContent );
193
-
194
- $ progressBar ->clear ();
195
- if (is_array ($ requestResult )) {
196
- $ resLog = array_key_exists ('log ' , $ requestResult )
197
- && is_array ($ requestResult ['log ' ]) ? chr (9 ) . chr (9 ) .
198
- implode (PHP_EOL . chr (9 ) . chr (9 ), $ requestResult ['log ' ]) : '' ;
199
- $ output ->writeln ('<info>OK: ' . $ resLog . '</info> ' . PHP_EOL );
200
- } else {
201
- $ output ->writeln (
202
- '<error>Error checking Crawler Result: ' . substr (
203
- (string ) preg_replace ('/\s+/ ' , ' ' , strip_tags ((string ) $ resultContent )),
204
- 0 ,
205
- 30000
206
- ) . '... ' . PHP_EOL . '</error> ' . PHP_EOL
207
- );
208
- }
209
- $ progressBar ->display ();
210
- }
211
- $ output ->writeln ('' );
212
- } elseif ($ mode === 'queue ' ) {
213
- $ output ->writeln (
214
- '<info>Putting ' . count ($ crawlerController ->urlList ) . ' entries in queue:</info> ' . PHP_EOL
215
- );
216
- $ this ->outputUrls ($ queueRows , $ output );
217
- } else {
218
- $ output ->writeln (
219
- '<info> ' . count (
220
- $ crawlerController ->urlList
221
- ) . ' entries found for processing. (Use "mode" to decide action):</info> ' . PHP_EOL
222
- );
223
- $ this ->outputUrls ($ queueRows , $ output );
224
- }
160
+ match ($ mode ) {
161
+ 'url ' => $ output ->writeln (
162
+ '<info> ' . implode (PHP_EOL , $ this ->crawlerController ->downloadUrls ) . PHP_EOL . '</info> '
163
+ ),
164
+ 'exec ' => $ this ->outputModeExec ($ output , $ queueRows ),
165
+ 'queue ' => $ this ->outputModeQueue ($ output , $ queueRows ),
166
+ default => $ this ->outputModeDefault ($ output , $ queueRows ),
167
+ };
225
168
226
169
return Command::SUCCESS ;
227
170
}
@@ -248,4 +191,85 @@ private function outputUrls(array $queueRows, OutputInterface $output): void
248
191
}
249
192
}
250
193
}
194
+
195
+ private function outputModeDefault (OutputInterface $ output , array $ queueRows ): void
196
+ {
197
+ $ output ->writeln (
198
+ '<info> ' . count (
199
+ $ this ->crawlerController ->urlList
200
+ ) . ' entries found for processing. (Use "mode" to decide action):</info> ' . PHP_EOL
201
+ );
202
+ $ this ->outputUrls ($ queueRows , $ output );
203
+ }
204
+
205
+ private function outputModeQueue (OutputInterface $ output , array $ queueRows ): void
206
+ {
207
+ $ output ->writeln (
208
+ '<info>Putting ' . count ($ this ->crawlerController ->urlList ) . ' entries in queue:</info> ' . PHP_EOL
209
+ );
210
+ $ this ->outputUrls ($ queueRows , $ output );
211
+ }
212
+
213
+ private function outputModeExec (OutputInterface $ output , array $ queueRows ): void
214
+ {
215
+ $ progressBar = new ProgressBar ($ output );
216
+ $ output ->writeln (
217
+ '<info>Executing ' . count ($ this ->crawlerController ->urlList ) . ' requests right away:</info> '
218
+ );
219
+ $ this ->outputUrls ($ queueRows , $ output );
220
+ $ output ->writeln ('<info>Processing</info> ' . PHP_EOL );
221
+
222
+ foreach ($ progressBar ->iterate ($ this ->crawlerController ->queueEntries ) as $ queueRec ) {
223
+ $ p = $ this ->jsonCompatibilityConverter ->convert ($ queueRec ['parameters ' ]);
224
+ if (is_bool ($ p )) {
225
+ continue ;
226
+ }
227
+
228
+ $ progressBar ->clear ();
229
+ if (empty ($ p ['procInstructions ' ][0 ])) {
230
+ $ procInstructionsString = '' ;
231
+ } else {
232
+ $ procInstructionsString = ' ( ' . implode (', ' , $ p ['procInstructions ' ]) . ') ' ;
233
+ }
234
+ $ output ->writeln ('<info> ' . $ p ['url ' ] . $ procInstructionsString . ' => ' . '</info> ' );
235
+ $ progressBar ->display ();
236
+
237
+ $ result = $ this ->crawlerController ->readUrlFromArray ($ queueRec );
238
+
239
+ $ resultContent = $ result ['content ' ] ?? '' ;
240
+ $ requestResult = $ this ->jsonCompatibilityConverter ->convert ($ resultContent );
241
+
242
+ $ progressBar ->clear ();
243
+ if (is_array ($ requestResult )) {
244
+ $ resLog = array_key_exists ('log ' , $ requestResult )
245
+ && is_array ($ requestResult ['log ' ]) ? chr (9 ) . chr (9 ) .
246
+ implode (PHP_EOL . chr (9 ) . chr (9 ), $ requestResult ['log ' ]) : '' ;
247
+ $ output ->writeln ('<info>OK: ' . $ resLog . '</info> ' . PHP_EOL );
248
+ } else {
249
+ $ output ->writeln (
250
+ '<error>Error checking Crawler Result: ' . substr (
251
+ (string ) preg_replace ('/\s+/ ' , ' ' , strip_tags ((string ) $ resultContent )),
252
+ 0 ,
253
+ 30000
254
+ ) . '... ' . PHP_EOL . '</error> ' . PHP_EOL
255
+ );
256
+ }
257
+ $ progressBar ->display ();
258
+ }
259
+ $ output ->writeln ('' );
260
+ }
261
+
262
+ private function getQueueRows (int $ pageId , InputInterface $ input , mixed $ mode , array $ configurationKeys ): array
263
+ {
264
+ return $ this ->crawlerController ->getPageTreeAndUrls (
265
+ $ pageId ,
266
+ MathUtility::forceIntegerInRange ((int ) $ input ->getOption ('depth ' ), 0 , 99 ),
267
+ $ this ->crawlerController ->getCurrentTime (),
268
+ MathUtility::forceIntegerInRange ((int ) $ input ->getOption ('number ' ) ?: 30 , 1 , 1000 ),
269
+ $ mode === 'queue ' || $ mode === 'exec ' ,
270
+ $ mode === 'url ' ,
271
+ [],
272
+ $ configurationKeys
273
+ );
274
+ }
251
275
}
0 commit comments