Skip to content

Commit 202b9ff

Browse files
committed
feat: Ignore quotes and double quotes in filtered words and categories
1 parent 6a7582d commit 202b9ff

File tree

6 files changed

+75
-12
lines changed

6 files changed

+75
-12
lines changed

example/uv.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "django-rss-filter"
3-
version = "0.10.1"
3+
version = "0.11.0"
44
description = "Filter public RSS feeds, remove articles that contain certain keywords or categories."
55
authors = [
66
{name = "Kevin Renskers", email = "[email protected]"},

rssfilter/utils.py

+18-8
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,26 @@ def validate_feed(feed_url: str) -> FeedValidationSuccess | FeedValidationError:
2929
# that validating and then fetching the feed is done in a consistent
3030
# manner.
3131
r = httpx.get(feed_url, follow_redirects=True, timeout=2, headers={"User-Agent": USER_AGENT})
32-
33-
feed = feedparser.parse(r.text)
34-
version = feed.get("version", "")
35-
if not version:
32+
feed = validate_feed_body(r.text)
33+
if not feed:
3634
return FeedValidationError(False, "This doesn't seem to be a valid RSS or Atom feed")
3735
return FeedValidationSuccess(True, feed)
38-
except ValueError:
39-
return FeedValidationError(False, "This doesn't seem to be a valid RSS or Atom feed")
4036
except ConnectTimeout:
4137
return FeedValidationError(False, "Couldn't load the URL due to a connection timeout")
4238
except ConnectError:
4339
return FeedValidationError(False, "Couldn't load the URL due to a connection error")
4440

4541

42+
def validate_feed_body(body: str) -> Literal[False] | FeedParserDict:
43+
try:
44+
feed = feedparser.parse(body)
45+
return feed if feed.version else False
46+
except AttributeError:
47+
return False
48+
except ValueError:
49+
return False
50+
51+
4652
def filter_feed(feed_body: str, filtered_words: str, filtered_categories: str) -> str:
4753
feed = feedparser.parse(feed_body)
4854

@@ -59,8 +65,12 @@ def filter_feed(feed_body: str, filtered_words: str, filtered_categories: str) -
5965
if published := feed.feed.get("published"):
6066
fg.pubDate(published)
6167

62-
filtered_words_list = [item.strip().lower() for item in filtered_words.split(",") if item.strip()]
63-
filtered_categories_list = [item.strip().lower() for item in filtered_categories.split(",") if item.strip()]
68+
filtered_words_list = [item.lower().strip().strip('"').strip("'") for item in filtered_words.split(",")]
69+
filtered_categories_list = [item.lower().strip().strip('"').strip("'") for item in filtered_categories.split(",")]
70+
71+
# Filter out empty strings
72+
filtered_words_list = [x for x in filtered_words_list if x]
73+
filtered_categories_list = [x for x in filtered_categories_list if x]
6474

6575
for entry in feed.entries:
6676
# Check if the title contains filtered words

tests/test_models.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from django.test import TestCase
2+
from django.utils import timezone
3+
4+
from rssfilter.models import FilteredFeed
5+
6+
7+
class ModelsTest(TestCase):
8+
def test_clear_cache_on_safe(self):
9+
feed = FilteredFeed.objects.create(
10+
feed_url="http://www.example.com/",
11+
cache_date=timezone.now(),
12+
filtered_feed_body="CACHED",
13+
)
14+
15+
self.assertEqual(feed.filtered_feed_body, "CACHED")
16+
17+
feed.filtered_words = "Changed value"
18+
feed.save()
19+
20+
self.assertEqual(feed.cache_date, None)
21+
self.assertEqual(feed.filtered_feed_body, "")

tests/test_utils.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import feedparser
22
from django.test import TestCase
33

4-
from rssfilter.utils import filter_feed
4+
from rssfilter.utils import filter_feed, validate_feed_body
55

66
body = """
77
<?xml version="1.0" encoding="UTF-8"?>
@@ -35,6 +35,18 @@
3535

3636

3737
class UtilsTest(TestCase):
38+
def test_validate_feed_body_vlaid(self):
39+
result = validate_feed_body(body)
40+
self.assertTrue(result)
41+
42+
def test_validate_feed_body_invalid(self):
43+
result = validate_feed_body("<html>Hello</html>")
44+
self.assertFalse(result)
45+
46+
def test_validate_feed_body_empty(self):
47+
result = validate_feed_body("")
48+
self.assertFalse(result)
49+
3850
def test_filter_words(self):
3951
filtered_feed_body = filter_feed(body, filtered_words="One", filtered_categories="")
4052
filtered_feed = feedparser.parse(filtered_feed_body)
@@ -55,6 +67,12 @@ def test_filter_words_empty(self):
5567

5668
self.assertEqual(len(filtered_feed.entries), 2)
5769

70+
def test_filter_words_with_empty_quotes(self):
71+
filtered_feed_body = filter_feed(body, filtered_words='""', filtered_categories="")
72+
filtered_feed = feedparser.parse(filtered_feed_body)
73+
74+
self.assertEqual(len(filtered_feed.entries), 2)
75+
5876
def test_filter_words_not_found(self):
5977
filtered_feed_body = filter_feed(body, filtered_words="Foo", filtered_categories="")
6078
filtered_feed = feedparser.parse(filtered_feed_body)
@@ -68,6 +86,20 @@ def test_filter_words_comma_seperated(self):
6886
self.assertEqual(len(filtered_feed.entries), 1)
6987
self.assertEqual(filtered_feed.entries[0].title, "Article Two")
7088

89+
def test_filter_words_with_quotes(self):
90+
filtered_feed_body = filter_feed(body, filtered_words="'Foo', 'Bar', 'One'", filtered_categories="")
91+
filtered_feed = feedparser.parse(filtered_feed_body)
92+
93+
self.assertEqual(len(filtered_feed.entries), 1)
94+
self.assertEqual(filtered_feed.entries[0].title, "Article Two")
95+
96+
def test_filter_words_with_double_quotes(self):
97+
filtered_feed_body = filter_feed(body, filtered_words='"Foo", "Bar", "One"', filtered_categories="")
98+
filtered_feed = feedparser.parse(filtered_feed_body)
99+
100+
self.assertEqual(len(filtered_feed.entries), 1)
101+
self.assertEqual(filtered_feed.entries[0].title, "Article Two")
102+
71103
def test_filter_categories(self):
72104
filtered_feed_body = filter_feed(body, filtered_words="", filtered_categories="Category Four")
73105
filtered_feed = feedparser.parse(filtered_feed_body)

uv.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)