-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
327 lines (285 loc) Β· 12.3 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
import streamlit as st
import pandas as pd
import requests
from html import unescape
from bs4 import BeautifulSoup
import re
import time
# Streamlit Configuration
st.set_page_config(
page_title="WhatsApp Link Validator",
page_icon="π",
layout="wide",
initial_sidebar_state="expanded"
)
# Constants
WHATSAPP_DOMAIN = "https://chat.whatsapp.com/"
IMAGE_PATTERN = re.compile(r'https:\/\/pps\.whatsapp\.net\/.*\.jpg\?[^&]*&[^&]+')
GOOGLE_SEARCH_URL = "https://www.google.com/search"
# Custom CSS for enhanced UI
st.markdown("""
<style>
.main-title {
font-size: 2.5em;
color: #25D366; /* WhatsApp Green */
text-align: center;
margin-bottom: 0;
font-weight: bold;
}
.subtitle {
font-size: 1.2em;
color: #4A4A4A; /* Dark Gray */
text-align: center;
margin-top: 0;
}
.stButton>button {
background-color: #25D366;
color: #FFFFFF;
border-radius: 8px;
font-weight: bold;
border: none;
padding: 8px 16px;
}
.stButton>button:hover {
background-color: #1EBE5A;
color: #FFFFFF;
}
.stProgress .st-bo {
background-color: #25D366;
}
.metric-card {
background-color: #F5F6F5;
padding: 12px;
border-radius: 8px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
color: #333333;
text-align: center;
}
.stTextInput, .stTextArea {
border: 1px solid #25D366;
border-radius: 5px;
}
.sidebar .sidebar-content {
background-color: #F5F6F5;
}
.stExpander {
border: 1px solid #E0E0E0;
border-radius: 5px;
}
</style>
""", unsafe_allow_html=True)
def validate_link(link):
"""Validate a WhatsApp group link and return details if active."""
result = {
"Group Name": "Expired",
"Group Link": link,
"Logo URL": "",
"Status": "Expired"
}
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9"
}
response = requests.get(link, headers=headers, timeout=10, allow_redirects=True)
if WHATSAPP_DOMAIN not in response.url:
return result
soup = BeautifulSoup(response.text, 'html.parser')
meta_title = soup.find('meta', property='og:title')
result["Group Name"] = unescape(meta_title['content']).strip() if meta_title and meta_title.get('content') else "Unnamed Group"
img_tags = soup.find_all('img', src=True)
for img in img_tags:
src = unescape(img['src'])
if IMAGE_PATTERN.match(src):
result["Logo URL"] = src
result["Status"] = "Active"
break
return result
except Exception:
return result
def scrape_whatsapp_links(url):
"""Scrape WhatsApp group links from a webpage."""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].startswith(WHATSAPP_DOMAIN)]
return links
except Exception:
return []
def google_search(query, num_pages):
"""Custom Google search to fetch URLs from multiple pages."""
search_results = []
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
for page in range(num_pages):
params = {
"q": query,
"start": page * 10 # Google pagination: 10 results per page
}
try:
response = requests.get(GOOGLE_SEARCH_URL, headers=headers, params=params, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Extract URLs from search results
for a in soup.find_all('a', href=True):
href = a['href']
if href.startswith('/url?q='):
url = href.split('/url?q=')[1].split('&')[0]
search_results.append(url)
time.sleep(2) # Pause to avoid rate-limiting
except Exception as e:
st.error(f"Error on page {page + 1}: {e}")
break
return list(set(search_results)) # Remove duplicates
def load_links(uploaded_file):
"""Load WhatsApp group links from an uploaded TXT or CSV file."""
if uploaded_file.name.endswith('.csv'):
return pd.read_csv(uploaded_file).iloc[:, 0].tolist()
else:
return [line.decode().strip() for line in uploaded_file.readlines()]
def main():
"""Main function for the WhatsApp Group Validator app."""
st.markdown('<h1 class="main-title">WhatsApp Group Validator π</h1>', unsafe_allow_html=True)
st.markdown('<p class="subtitle">Search, scrape, or validate WhatsApp group links with ease</p>', unsafe_allow_html=True)
# Sidebar for settings
with st.sidebar:
st.header("βοΈ Settings")
st.markdown("Customize your experience")
input_method = st.selectbox("Input Method", ["Search and Scrape from Google", "Enter Links Manually", "Upload File (TXT/CSV)"], help="Choose how to input links")
if input_method == "Search and Scrape from Google":
num_pages = st.slider("Google Pages to Scrape", min_value=1, max_value=5, value=3, help="More pages may increase scraping time")
# Clear Results Button
if st.button("ποΈ Clear Results", use_container_width=True):
if 'results' in st.session_state:
del st.session_state['results']
st.success("Results cleared successfully!")
# Input Section
with st.container():
results = []
if input_method == "Search and Scrape from Google":
st.subheader("π Google Search & Scrape")
keyword = st.text_input("Search Query:", placeholder="e.g., 'whatsapp group links site:*.com -inurl:(login)'", help="Refine with site: or -inurl:")
if st.button("Search, Scrape, and Validate", use_container_width=True):
if not keyword:
st.warning("Please enter a search query.")
return
with st.spinner("Searching Google..."):
search_results = google_search(keyword, num_pages)
if not search_results:
st.info("No search results found for the query.")
return
st.success(f"Found {len(search_results)} webpages. Scraping WhatsApp links...")
# Scrape links with progress
all_links = []
progress_bar = st.progress(0)
for idx, url in enumerate(search_results):
links = scrape_whatsapp_links(url)
all_links.extend(links)
progress_bar.progress((idx + 1) / len(search_results))
unique_links = list(set(all_links))
if not unique_links:
st.warning("No WhatsApp group links found in the scraped webpages.")
return
st.success(f"Scraped {len(unique_links)} unique WhatsApp group links. Validating...")
# Validate links
progress_bar = st.progress(0)
status_text = st.empty()
for i, link in enumerate(unique_links):
result = validate_link(link)
results.append(result)
progress_bar.progress((i + 1) / len(unique_links))
status_text.text(f"Validated {i + 1}/{len(unique_links)} links")
elif input_method == "Enter Links Manually":
st.subheader("π Manual Link Entry")
links_text = st.text_area("Enter WhatsApp Links (one per line):", height=200, placeholder="e.g., https://chat.whatsapp.com/ABC123")
if st.button("Validate Links", use_container_width=True):
links = [line.strip() for line in links_text.split('\n') if line.strip()]
if not links:
st.warning("Please enter at least one link.")
return
progress_bar = st.progress(0)
status_text = st.empty()
for i, link in enumerate(links):
result = validate_link(link)
results.append(result)
progress_bar.progress((i + 1) / len(links))
status_text.text(f"Validated {i + 1}/{len(links)} links")
elif input_method == "Upload File (TXT/CSV)":
st.subheader("π₯ File Upload")
uploaded_file = st.file_uploader("Upload TXT or CSV", type=["txt", "csv"], help="One link per line or in first column")
if uploaded_file and st.button("Validate File Links", use_container_width=True):
links = load_links(uploaded_file)
if not links:
st.warning("No links found in the uploaded file.")
return
progress_bar = st.progress(0)
status_text = st.empty()
for i, link in enumerate(links):
result = validate_link(link)
results.append(result)
progress_bar.progress((i + 1) / len(links))
status_text.text(f"Validated {i + 1}/{len(links)} links")
# Store results in session state
if results:
st.session_state['results'] = results
# Results Section
if 'results' in st.session_state:
df = pd.DataFrame(st.session_state['results'])
active_df = df[df['Status'] == 'Active']
expired_df = df[df['Status'] == 'Expired']
# Summary Metrics
st.subheader("π Results Summary")
col1, col2, col3 = st.columns(3)
with col1:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
st.metric("Total Links", len(df), help="Total links processed")
st.markdown('</div>', unsafe_allow_html=True)
with col2:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
st.metric("Active Links", len(active_df), help="Valid WhatsApp groups")
st.markdown('</div>', unsafe_allow_html=True)
with col3:
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
st.metric("Expired Links", len(expired_df), help="Invalid or expired links")
st.markdown('</div>', unsafe_allow_html=True)
# Filter and Display Results
with st.expander("π View and Filter Results", expanded=True):
status_filter = st.multiselect("Filter by Status", options=["Active", "Expired"], default=["Active"], help="Select statuses to display")
filtered_df = df[df['Status'].isin(status_filter)] if status_filter else df
st.dataframe(
filtered_df,
column_config={
"Group Link": st.column_config.LinkColumn("Invite Link", display_text="Join Group", help="Click to join"),
"Logo URL": st.column_config.LinkColumn("Logo", help="Click to view logo")
},
height=400,
use_container_width=True
)
# Download Buttons
col_dl1, col_dl2 = st.columns(2)
with col_dl1:
csv_active = active_df.to_csv(index=False)
st.download_button(
"π₯ Download Active Groups",
csv_active,
"active_groups.csv",
"text/csv",
use_container_width=True
)
with col_dl2:
csv_all = df.to_csv(index=False)
st.download_button(
"π₯ Download All Results",
csv_all,
"all_groups.csv",
"text/csv",
use_container_width=True
)
else:
st.info("Select an input method and start validating WhatsApp group links!", icon="βΉοΈ")
if __name__ == "__main__":
main()