-
Notifications
You must be signed in to change notification settings - Fork 3.7k
/
Copy pathhello_world.py
50 lines (46 loc) · 1.46 KB
/
hello_world.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import asyncio
from crawl4ai import (
AsyncWebCrawler,
BrowserConfig,
CrawlerRunConfig,
CacheMode,
DefaultMarkdownGenerator,
PruningContentFilter,
CrawlResult
)
async def example_cdp():
browser_conf = BrowserConfig(
headless=False,
cdp_url="http://localhost:9223"
)
crawler_config = CrawlerRunConfig(
session_id="test",
js_code = """(() => { return {"result": "Hello World!"} })()""",
js_only=True
)
async with AsyncWebCrawler(
config=browser_conf,
verbose=True,
) as crawler:
result : CrawlResult = await crawler.arun(
url="https://www.helloworld.org",
config=crawler_config,
)
print(result.js_execution_result)
async def main():
browser_config = BrowserConfig(headless=False, verbose=True)
async with AsyncWebCrawler(config=browser_config) as crawler:
crawler_config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
markdown_generator=DefaultMarkdownGenerator(
content_filter=PruningContentFilter(
threshold=0.48, threshold_type="fixed", min_word_threshold=0
)
),
)
result : CrawlResult = await crawler.arun(
url="https://www.helloworld.org", config=crawler_config
)
print(result.markdown.raw_markdown[:500])
if __name__ == "__main__":
asyncio.run(main())