Skip to content

Commit b24b9ee

Browse files
committed
Add comments to Reddit parser
1 parent 299f1d6 commit b24b9ee

File tree

2 files changed

+29
-2
lines changed

2 files changed

+29
-2
lines changed

src/extractors/custom/www.reddit.com/index.js

+7
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,11 @@ export const WwwRedditComExtractor = {
5959
'div a[data-test-id="comments-page-link-num-comments"]',
6060
],
6161
},
62+
63+
extend: {
64+
comments: {
65+
selectors: ['div[data-testid="comment"]'],
66+
allowMultiple: true,
67+
},
68+
},
6269
};

src/extractors/custom/www.reddit.com/index.test.js

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import assert from 'assert';
2-
import URL from 'url';
32
import cheerio from 'cheerio';
43
import moment from 'moment-timezone';
4+
import URL from 'url';
55

6-
import Mercury from 'mercury';
76
import getExtractor from 'extractors/get-extractor';
7+
import Mercury from 'mercury';
88
import { excerptContent } from 'utils/text';
99

1010
const fs = require('fs');
@@ -224,5 +224,25 @@ describe('WwwRedditComExtractor', () => {
224224

225225
assert.equal(embed.length, 1);
226226
});
227+
228+
it('returns the comments', async () => {
229+
const html = fs.readFileSync(
230+
'./fixtures/www.reddit.com--title-only.html'
231+
);
232+
const uri =
233+
'https://www.reddit.com/r/AskReddit/comments/axtih6/what_is_the_most_worth_it_item_you_have_ever/';
234+
235+
const { comments } = await Mercury.parse(uri, { html });
236+
237+
const expectedCommentsLength = 12;
238+
assert.equal(comments.length, expectedCommentsLength);
239+
240+
const first13 = excerptContent(comments[0] || '', 13);
241+
242+
assert.equal(
243+
first13,
244+
'A Miele canister vacuum. I had read the 4 AMAs from the vacuum'
245+
);
246+
});
227247
});
228248
});

0 commit comments

Comments
 (0)