Skip to content

Commit 28049a6

Browse files
author
Gal Ben David
committed
added search of multiple strings. updated dependencies. fixed linting errors
1 parent 9546c6c commit 28049a6

14 files changed

+347
-190
lines changed

.github/workflows/build.yml

+18-8
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
name: Build
2-
on: [push, pull_request]
2+
on:
3+
- push
4+
- pull_request
35
jobs:
46
lint:
57
if: github.event_name == 'push' && !startsWith(github.event.ref, 'refs/tags')
68
runs-on: ubuntu-latest
79
steps:
810
- name: Checkout
9-
uses: actions/checkout@v2
11+
uses: actions/checkout@v3
1012
- name: Install latest rust
1113
uses: actions-rs/toolchain@v1
1214
with:
1315
toolchain: stable
16+
profile: minimal
1417
override: true
1518
components: clippy
1619
- name: Lint with clippy
@@ -24,17 +27,24 @@ jobs:
2427
strategy:
2528
fail-fast: false
2629
matrix:
27-
python-version: ['3.7', '3.8', '3.9', '3.10']
28-
os: [ubuntu-latest , macos-latest, windows-latest]
30+
python-version:
31+
- '3.7'
32+
- '3.8'
33+
- '3.9'
34+
- '3.10'
35+
os:
36+
- ubuntu-latest
37+
- macos-latest
38+
- windows-latest
2939
steps:
3040
- name: Checkout
31-
uses: actions/checkout@v2
41+
uses: actions/checkout@v3
3242
- name: Set up Python ${{ matrix.python-version }}
33-
uses: actions/setup-python@v2
43+
uses: actions/setup-python@v3
3444
with:
3545
python-version: ${{ matrix.python-version }}
36-
- name: Run image
37-
uses: abatilo/actions-poetry@v2.0.0
46+
- name: Install Poetry
47+
uses: abatilo/actions-poetry@v2.1.3
3848
- name: Install Rust
3949
uses: actions-rs/toolchain@v1
4050
with:

.github/workflows/deploy.yml

+16-19
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,28 @@
11
name: Deploy
22
on:
33
release:
4-
types: [released]
4+
types:
5+
- released
56
jobs:
67
deploy:
78
runs-on: ${{ matrix.os }}
89
strategy:
910
fail-fast: false
1011
matrix:
11-
python-version: ['3.7', '3.8', '3.9', '3.10']
12-
os: [ubuntu-latest, macos-latest, windows-latest]
12+
python-version:
13+
- '3.7'
14+
- '3.8'
15+
- '3.9'
16+
- '3.10'
17+
os:
18+
- ubuntu-latest
19+
- macos-latest
20+
- windows-latest
1321
steps:
1422
- name: Checkout
15-
uses: actions/checkout@v2
23+
uses: actions/checkout@v3
1624
- name: Set up Python ${{ matrix.python-version }}
17-
uses: actions/setup-python@v2
25+
uses: actions/setup-python@v3
1826
with:
1927
python-version: ${{ matrix.python-version }}
2028
- name: Install Rust
@@ -23,21 +31,10 @@ jobs:
2331
profile: minimal
2432
toolchain: stable
2533
override: true
26-
- uses: messense/maturin-action@v1
27-
if: runner.os != 'Windows'
34+
- name: Publish Package
35+
uses: messense/maturin-action@v1
2836
with:
29-
maturin-version: latest
3037
command: publish
31-
manylinux: 2_24
32-
args: --username __token__ --no-sdist --interpreter python${{ matrix.python-version }}
33-
env:
34-
MATURIN_PASSWORD: ${{ secrets.pypi_password }}
35-
- uses: messense/maturin-action@v1
36-
if: runner.os == 'Windows'
37-
with:
38-
maturin-version: latest
39-
command: publish
40-
manylinux: 2_24
41-
args: --username __token__ --no-sdist --interpreter python
38+
args: --username=__token__ --no-sdist --interpreter=python${{ !startsWith(matrix.os, 'windows') && matrix.python-version || '' }}
4239
env:
4340
MATURIN_PASSWORD: ${{ secrets.pypi_password }}

Cargo.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "pysubstringsearch"
3-
version = "0.5.0"
3+
version = "0.6.0"
44
authors = ["Gal Ben David <[email protected]>"]
55
edition = "2021"
66
description = "A Python library written in Rust that searches for substrings quickly using a Suffix Array"
@@ -19,7 +19,7 @@ keywords = [
1919
]
2020

2121
[package.metadata.maturin]
22-
requires-python = ">=3.6"
22+
requires-python = ">=3.7"
2323
classifier = [
2424
"License :: OSI Approved :: MIT License",
2525
"Operating System :: MacOS",
@@ -41,11 +41,11 @@ ahash = "0.7"
4141
bstr = "0.2"
4242
byteorder = "1"
4343
memchr = "2"
44-
parking_lot = "0.11"
44+
parking_lot = "0.12"
4545
rayon = "1"
4646

4747
[dependencies.pyo3]
48-
version = "0.15.1"
48+
version = "0.16.4"
4949
features = ["extension-module"]
5050

5151
[build-dependencies]

LICENSE

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2021 Gal Ben David
3+
Copyright (c) 2022 Gal Ben David
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

+10
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ PySubstringSearch is a library designed to search over an index file for substri
3232

3333
The module implements a method for searching.
3434
- `search` - Find different entries with the same substring concurrently. Concurrency increases as the index file grows in size with multiple inner chunks.
35+
- `search_multiple` - same as `search` but accepts multiple substrings in a single call
3536

3637

3738
### Built With
@@ -105,6 +106,15 @@ reader.search('short')
105106
# lookup for a substring
106107
reader.search('string')
107108
>>> ['some short string', 'another but now a longer string']
109+
110+
# lookup for multiple substrings
111+
reader.search_multiple(
112+
[
113+
'short',
114+
'longer',
115+
],
116+
)
117+
>>> ['some short string', 'another but now a longer string']
108118
```
109119

110120

pyproject.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[build-system]
2-
requires = ["maturin>=0.11,<0.12"]
2+
requires = ["maturin>=0.12,<0.13"]
33
build-backend = "maturin"
44

55
[tool.maturin]
@@ -12,7 +12,7 @@ sdist-include = [
1212

1313
[tool.poetry]
1414
name = "pysubstringsearch"
15-
version = "0.5.0"
15+
version = "0.6.0"
1616
authors = ["Gal Ben David <[email protected]>"]
1717
description = "A Python library written in Rust that searches for substrings quickly using a Suffix Array"
1818
readme = "README.md"
@@ -41,7 +41,7 @@ classifiers = [
4141
]
4242

4343
[tool.poetry.dependencies]
44-
python = "^3.6"
44+
python = "^3.7"
4545

4646
[tool.poetry.dev-dependencies]
4747
pytest = "*"

pysubstringsearch/__init__.py

+70-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,73 @@
1+
import typing
2+
13
from . import pysubstringsearch
24

35

4-
Writer = pysubstringsearch.Writer
5-
Reader = pysubstringsearch.Reader
6+
class Writer:
7+
def __init__(
8+
self,
9+
index_file_path: str,
10+
max_chunk_len: typing.Optional[int] = None,
11+
) -> None:
12+
self.writer = pysubstringsearch.Writer(
13+
index_file_path=index_file_path,
14+
max_chunk_len=max_chunk_len,
15+
)
16+
17+
def add_entries_from_file_lines(
18+
self,
19+
input_file_path: str,
20+
) -> None:
21+
self.writer.add_entries_from_file_lines(
22+
input_file_path=input_file_path,
23+
)
24+
25+
def add_entry(
26+
self,
27+
text: str,
28+
) -> None:
29+
self.writer.add_entry(
30+
text=text,
31+
)
32+
33+
def dump_data(
34+
self,
35+
) -> None:
36+
self.writer.dump_data()
37+
38+
def finalize(
39+
self,
40+
) -> None:
41+
self.writer.finalize()
42+
43+
44+
class Reader:
45+
def __init__(
46+
self,
47+
index_file_path: str,
48+
) -> None:
49+
self.reader = pysubstringsearch.Reader(
50+
index_file_path=index_file_path,
51+
)
52+
53+
def search(
54+
self,
55+
substring: str,
56+
) -> typing.List[str]:
57+
return self.reader.search(
58+
substring=substring,
59+
)
60+
61+
def search_multiple(
62+
self,
63+
substrings: typing.List[str],
64+
) -> typing.List[str]:
65+
results = []
66+
for substring in substrings:
67+
results.extend(
68+
self.search(
69+
substring=substring,
70+
),
71+
)
72+
73+
return results

pysubstringsearch/py.typed

Whitespace-only changes.

pysubstringsearch/pysubstringsearch.pyi

+6-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ class Writer:
55
def __init__(
66
self,
77
index_file_path: str,
8-
max_chunk_len: typing.Optional[int],
8+
max_chunk_len: typing.Optional[int] = None,
99
) -> None: ...
1010

1111
def add_entries_from_file_lines(
@@ -37,3 +37,8 @@ class Reader:
3737
self,
3838
substring: str,
3939
) -> typing.List[str]: ...
40+
41+
def search_multiple(
42+
self,
43+
substrings: typing.List[str],
44+
) -> typing.List[str]: ...

src/lib.rs

+6-9
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,19 @@ extern "C" {
2424
fn construct_suffix_array(
2525
buffer: &[u8],
2626
) -> Vec<i32> {
27-
unsafe {
28-
let mut suffix_array: Vec<i32> = Vec::with_capacity(buffer.len());
29-
suffix_array.set_len(buffer.len());
27+
let mut suffix_array = vec![0; buffer.len()];
3028

29+
unsafe {
3130
libsais(
3231
buffer.as_ptr(),
3332
suffix_array.as_mut_ptr(),
3433
buffer.len() as i32,
3534
0,
3635
std::ptr::null_mut::<i32>(),
3736
);
38-
39-
suffix_array
4037
}
38+
39+
suffix_array
4140
}
4241

4342
#[pyclass]
@@ -174,8 +173,7 @@ impl Reader {
174173

175174
while bytes_read < index_file_len {
176175
let data_file_len = index_file.read_u32::<LittleEndian>()?;
177-
let mut data = Vec::with_capacity(data_file_len as usize);
178-
unsafe { data.set_len(data_file_len as usize) };
176+
let mut data = vec![0; data_file_len as usize];
179177
index_file.read_exact(&mut data)?;
180178

181179
let suffixes_file_len = index_file.read_u32::<LittleEndian>()? as usize;
@@ -256,8 +254,7 @@ impl Reader {
256254
let start_of_indices = start_of_indices.unwrap();
257255
let end_of_indices = end_of_indices.unwrap();
258256

259-
let mut suffixes = Vec::with_capacity(end_of_indices - start_of_indices + 4);
260-
unsafe { suffixes.set_len(end_of_indices - start_of_indices + 4) };
257+
let mut suffixes = vec![0; end_of_indices - start_of_indices + 4];
261258

262259
sub_index.index_file.seek(SeekFrom::Start(start_of_indices as u64)).unwrap();
263260
sub_index.index_file.read_exact(&mut suffixes).unwrap();

0 commit comments

Comments
 (0)