Add benchmarking test suite and greatly improve performance in a few cases (#948)

* Add benchmarking test suite

* Improve amortized time of model relation loads with a large number of rows

* Improve performance of loading models with many related models

* Improve performance of loading models with many related models to O(N)ish

* Fix bug where N model creation with shared related model would build in N^2 time

* Lower blocking time for queryset results

* Add docstrings and streamline hash code

Co-authored-by: haydeec1 <Eric.Haydel@jhuapl.edu>
This commit is contained in:
erichaydel
2022-12-10 11:12:11 -05:00
committed by GitHub
parent 171ef2ffaa
commit 7c18fa55e7
25 changed files with 1250 additions and 230 deletions

0
benchmarks/__init__.py Normal file
View File

117
benchmarks/conftest.py Normal file
View File

@ -0,0 +1,117 @@
import asyncio
import random
import string
import time
import databases
import nest_asyncio
import pytest
import pytest_asyncio
import sqlalchemy
import ormar
from tests.settings import DATABASE_URL
nest_asyncio.apply()
database = databases.Database(DATABASE_URL)
metadata = sqlalchemy.MetaData()
pytestmark = pytest.mark.asyncio
class BaseMeta(ormar.ModelMeta):
metadata = metadata
database = database
class Author(ormar.Model):
class Meta(BaseMeta):
tablename = "authors"
id: int = ormar.Integer(primary_key=True)
name: str = ormar.String(max_length=100)
score: float = ormar.Integer(minimum=0, maximum=100)
class AuthorWithManyFields(Author):
year_born: int = ormar.Integer()
year_died: int = ormar.Integer(nullable=True)
birthplace: str = ormar.String(max_length=255)
class Publisher(ormar.Model):
class Meta(BaseMeta):
tablename = "publishers"
id: int = ormar.Integer(primary_key=True)
name: str = ormar.String(max_length=100)
prestige: int = ormar.Integer(minimum=0, maximum=10)
class Book(ormar.Model):
class Meta(BaseMeta):
tablename = "books"
id: int = ormar.Integer(primary_key=True)
author: Author = ormar.ForeignKey(Author, index=True)
publisher: Publisher = ormar.ForeignKey(Publisher, index=True)
title: str = ormar.String(max_length=100)
year: int = ormar.Integer(nullable=True)
@pytest.fixture(autouse=True, scope="function") # TODO: fix this to be module
def create_test_database():
engine = sqlalchemy.create_engine(DATABASE_URL)
metadata.drop_all(engine)
metadata.create_all(engine)
yield
metadata.drop_all(engine)
@pytest_asyncio.fixture
async def author():
author = await Author(name="Author", score=10).save()
return author
@pytest_asyncio.fixture
async def publisher():
publisher = await Publisher(name="Publisher", prestige=random.randint(0, 10)).save()
return publisher
@pytest_asyncio.fixture
async def authors_in_db(num_models: int):
authors = [
Author(
name="".join(random.sample(string.ascii_letters, 5)),
score=random.random() * 100,
)
for i in range(0, num_models)
]
await Author.objects.bulk_create(authors)
return await Author.objects.all()
@pytest_asyncio.fixture
@pytest.mark.benchmark(
min_rounds=1, timer=time.process_time, disable_gc=True, warmup=False
)
async def aio_benchmark(benchmark, event_loop: asyncio.BaseEventLoop):
def _fixture_wrapper(func):
def _func_wrapper(*args, **kwargs):
if asyncio.iscoroutinefunction(func):
@benchmark
def benchmarked_func():
a = event_loop.run_until_complete(func(*args, **kwargs))
return a
return benchmarked_func
else:
return benchmark(func, *args, **kwargs)
return _func_wrapper
return _fixture_wrapper

View File

@ -0,0 +1,57 @@
from typing import List
import pytest
from benchmarks.conftest import Author
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_count(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def count():
return await Author.objects.count()
c = count()
assert c == len(authors_in_db)
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_avg(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def avg():
return await Author.objects.avg("score")
average = avg()
assert 0 <= average <= 100
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_sum(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def sum_():
return await Author.objects.sum("score")
s = sum_()
assert 0 <= s <= 100 * num_models
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_min(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def min_():
return await Author.objects.min("score")
m = min_()
assert 0 <= m <= 100
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_max(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def max_():
return await Author.objects.max("score")
m = max_()
assert 0 <= m <= 100

View File

@ -0,0 +1,26 @@
import random
import string
import pytest
from benchmarks.conftest import Author
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_making_and_inserting_models_in_bulk(aio_benchmark, num_models: int):
@aio_benchmark
async def make_and_insert(num_models: int):
authors = [
Author(
name="".join(random.sample(string.ascii_letters, 5)),
score=random.random() * 100,
)
for i in range(0, num_models)
]
assert len(authors) == num_models
await Author.objects.bulk_create(authors)
make_and_insert(num_models)

View File

@ -0,0 +1,27 @@
import random
import string
from typing import List
import pytest
from benchmarks.conftest import Author
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_updating_models_in_bulk(
aio_benchmark, num_models: int, authors_in_db: List[Author]
):
starting_first_name = authors_in_db[0].name
@aio_benchmark
async def update(authors: List[Author]):
await Author.objects.bulk_update(authors)
for author in authors_in_db:
author.name = "".join(random.sample(string.ascii_letters, 5))
update(authors_in_db)
author = await Author.objects.get(id=authors_in_db[0].id)
assert author.name != starting_first_name

View File

@ -0,0 +1,91 @@
import random
import string
import pytest
from benchmarks.conftest import Author, Book, Publisher
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_creating_models_individually(aio_benchmark, num_models: int):
@aio_benchmark
async def create(num_models: int):
authors = []
for idx in range(0, num_models):
author = await Author.objects.create(
name="".join(random.sample(string.ascii_letters, 5)),
score=random.random() * 100,
)
authors.append(author)
return authors
authors = create(num_models)
for author in authors:
assert author.id is not None
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_creating_individually_with_related_models(
aio_benchmark, num_models: int, author: Author, publisher: Publisher
):
@aio_benchmark
async def create_with_related_models(
author: Author, publisher: Publisher, num_models: int
):
books = []
for idx in range(0, num_models):
book = await Book.objects.create(
author=author,
publisher=publisher,
title="".join(random.sample(string.ascii_letters, 5)),
year=random.randint(0, 2000),
)
books.append(book)
return books
books = create_with_related_models(
author=author, publisher=publisher, num_models=num_models
)
for book in books:
assert book.id is not None
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_get_or_create_when_create(aio_benchmark, num_models: int):
@aio_benchmark
async def get_or_create(num_models: int):
authors = []
for idx in range(0, num_models):
author, created = await Author.objects.get_or_create(
name="".join(random.sample(string.ascii_letters, 5)),
score=random.random() * 100,
)
assert created
authors.append(author)
return authors
authors = get_or_create(num_models)
for author in authors:
assert author.id is not None
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_update_or_create_when_create(aio_benchmark, num_models: int):
@aio_benchmark
async def update_or_create(num_models: int):
authors = []
for idx in range(0, num_models):
author = await Author.objects.update_or_create(
name="".join(random.sample(string.ascii_letters, 5)),
score=random.random() * 100,
)
authors.append(author)
return authors
authors = update_or_create(num_models)
for author in authors:
assert author.id is not None

View File

@ -0,0 +1,36 @@
from typing import List
import pytest
from benchmarks.conftest import Author
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_deleting_all(
aio_benchmark, num_models: int, authors_in_db: List[Author]
):
@aio_benchmark
async def delete_all():
await Author.objects.delete(each=True)
delete_all()
num = await Author.objects.count()
assert num == 0
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_deleting_individually(
aio_benchmark, num_models: int, authors_in_db: List[Author]
):
@aio_benchmark
async def delete_one_by_one(authors: List[Author]):
for author in authors:
await Author.objects.filter(id=author.id).delete()
delete_one_by_one(authors_in_db)
num = await Author.objects.count()
assert num == 0

View File

@ -0,0 +1,102 @@
import random
import string
from typing import List
import pytest
import pytest_asyncio
from benchmarks.conftest import Author, Book, Publisher
pytestmark = pytest.mark.asyncio
@pytest_asyncio.fixture()
async def books(author: Author, publisher: Publisher, num_models: int):
books = [
Book(
author=author,
publisher=publisher,
title="".join(random.sample(string.ascii_letters, 5)),
year=random.randint(0, 2000),
)
for _ in range(0, num_models)
]
await Book.objects.bulk_create(books)
return books
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_get_all(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def get_all(authors: List[Author]):
return await Author.objects.all()
authors = get_all(authors_in_db)
for idx, author in enumerate(authors_in_db):
assert authors[idx].id == author.id
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_get_all_with_related_models(
aio_benchmark, num_models: int, author: Author, books: List[Book]
):
@aio_benchmark
async def get_with_related(author: Author):
return await Author.objects.select_related("books").all(id=author.id)
authors = get_with_related(author)
assert len(authors[0].books) == num_models
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_get_one(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def get_one(authors: List[Author]):
return await Author.objects.get(id=authors[0].id)
author = get_one(authors_in_db)
assert author == authors_in_db[0]
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_get_or_none(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def get_or_none(authors: List[Author]):
return await Author.objects.get_or_none(id=authors[0].id)
author = get_or_none(authors_in_db)
assert author == authors_in_db[0]
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_get_or_create_when_get(
aio_benchmark, num_models: int, authors_in_db: List[Author]
):
@aio_benchmark
async def get_or_create(authors: List[Author]):
author, created = await Author.objects.get_or_create(id=authors[0].id)
assert not created
return author
author = get_or_create(authors_in_db)
assert author == authors_in_db[0]
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_first(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def first():
return await Author.objects.first()
author = first()
assert author == authors_in_db[0]
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_exists(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def check_exists(authors: List[Author]):
return await Author.objects.filter(id=authors[0].id).exists()
exists = check_exists(authors_in_db)
assert exists

View File

@ -0,0 +1,48 @@
import random
import string
import pytest
from benchmarks.conftest import Author, Book, Publisher
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_initializing_models(aio_benchmark, num_models: int):
@aio_benchmark
async def initialize_models(num_models: int):
authors = [
Author(
name="".join(random.sample(string.ascii_letters, 5)),
score=random.random() * 100,
)
for i in range(0, num_models)
]
assert len(authors) == num_models
initialize_models(num_models)
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_initializing_models_with_related_models(aio_benchmark, num_models: int):
@aio_benchmark
async def initialize_models_with_related_models(
author: Author, publisher: Publisher, num_models: int
):
books = [
Book(
author=author,
publisher=publisher,
title="".join(random.sample(string.ascii_letters, 5)),
year=random.randint(0, 2000),
)
for i in range(0, num_models)
]
author = await Author(name="Author", score=10).save()
publisher = await Publisher(name="Publisher", prestige=random.randint(0, 10)).save()
ids = initialize_models_with_related_models(
author=author, publisher=publisher, num_models=num_models
)

View File

@ -0,0 +1,21 @@
from typing import List
import pytest
from benchmarks.conftest import Author
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_iterate(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def iterate_over_all(authors: List[Author]):
authors = []
async for author in Author.objects.iterate():
authors.append(author)
return authors
authors = iterate_over_all(authors_in_db)
for idx, author in enumerate(authors_in_db):
assert authors[idx].id == author.id

View File

@ -0,0 +1,65 @@
import random
import string
import pytest
from benchmarks.conftest import Author, Book, Publisher
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_saving_models_individually(aio_benchmark, num_models: int):
@aio_benchmark
async def make_and_insert(num_models: int):
authors = [
Author(
name="".join(random.sample(string.ascii_letters, 5)),
score=random.random() * 100,
)
for i in range(0, num_models)
]
assert len(authors) == num_models
ids = []
for author in authors:
a = await author.save()
ids.append(a)
return ids
ids = make_and_insert(num_models)
for id in ids:
assert id is not None
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_saving_models_individually_with_related_models(
aio_benchmark, num_models: int, author: Author, publisher: Publisher
):
@aio_benchmark
async def making_and_inserting_related_models_one_by_one(
author: Author, publisher: Publisher, num_models: int
):
books = [
Book(
author=author,
publisher=publisher,
title="".join(random.sample(string.ascii_letters, 5)),
year=random.randint(0, 2000),
)
for i in range(0, num_models)
]
ids = []
for book in books:
await book.save()
ids.append(book.id)
return ids
ids = making_and_inserting_related_models_one_by_one(
author=author, publisher=publisher, num_models=num_models
)
for id in ids:
assert id is not None

View File

@ -0,0 +1,27 @@
import random
import string
from typing import List
import pytest
from benchmarks.conftest import Author
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [10, 20, 40])
async def test_updating_models_individually(
aio_benchmark, num_models: int, authors_in_db: List[Author]
):
starting_first_name = authors_in_db[0].name
@aio_benchmark
async def update(authors: List[Author]):
for author in authors:
a = await author.update(
name="".join(random.sample(string.ascii_letters, 5))
)
update(authors_in_db)
author = await Author.objects.get(id=authors_in_db[0].id)
assert author.name != starting_first_name

View File

@ -0,0 +1,29 @@
from typing import List
import pytest
from benchmarks.conftest import Author
pytestmark = pytest.mark.asyncio
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_values(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def get_all_values(authors: List[Author]):
return await Author.objects.values()
authors_list = get_all_values(authors_in_db)
for idx, author in enumerate(authors_in_db):
assert authors_list[idx]["id"] == author.id
@pytest.mark.parametrize("num_models", [250, 500, 1000])
async def test_values_list(aio_benchmark, num_models: int, authors_in_db: List[Author]):
@aio_benchmark
async def get_all_values_list(authors: List[Author]):
return await Author.objects.values_list()
authors_list = get_all_values_list(authors_in_db)
for idx, author in enumerate(authors_in_db):
assert authors_list[idx][0] == author.id