Add benchmarking test suite and greatly improve performance in a few cases (#948)

* Add benchmarking test suite * Improve amortized time of model relation loads with a large number of rows * Improve performance of loading models with many related models * Improve performance of loading models with many related models to O(N)ish * Fix bug where N model creation with shared related model would build in N^2 time * Lower blocking time for queryset results * Add docstrings and streamline hash code Co-authored-by: haydeec1 <Eric.Haydel@jhuapl.edu>
2022-12-10 11:12:11 -05:00
parent 171ef2ffaa
commit 7c18fa55e7
25 changed files with 1250 additions and 230 deletions
--- a/benchmarks/init.py
+++ b/benchmarks/init.py
--- a/benchmarks/conftest.py
+++ b/benchmarks/conftest.py
@ -0,0 +1,117 @@
+import asyncio
+import random
+import string
+import time
+
+import databases
+import nest_asyncio
+import pytest
+import pytest_asyncio
+import sqlalchemy
+
+import ormar
+from tests.settings import DATABASE_URL
+
+nest_asyncio.apply()
+
+
+database = databases.Database(DATABASE_URL)
+metadata = sqlalchemy.MetaData()
+pytestmark = pytest.mark.asyncio
+
+
+class BaseMeta(ormar.ModelMeta):
+    metadata = metadata
+    database = database
+
+
+class Author(ormar.Model):
+    class Meta(BaseMeta):
+        tablename = "authors"
+
+    id: int = ormar.Integer(primary_key=True)
+    name: str = ormar.String(max_length=100)
+    score: float = ormar.Integer(minimum=0, maximum=100)
+
+
+class AuthorWithManyFields(Author):
+    year_born: int = ormar.Integer()
+    year_died: int = ormar.Integer(nullable=True)
+    birthplace: str = ormar.String(max_length=255)
+
+
+class Publisher(ormar.Model):
+    class Meta(BaseMeta):
+        tablename = "publishers"
+
+    id: int = ormar.Integer(primary_key=True)
+    name: str = ormar.String(max_length=100)
+    prestige: int = ormar.Integer(minimum=0, maximum=10)
+
+
+class Book(ormar.Model):
+    class Meta(BaseMeta):
+        tablename = "books"
+
+    id: int = ormar.Integer(primary_key=True)
+    author: Author = ormar.ForeignKey(Author, index=True)
+    publisher: Publisher = ormar.ForeignKey(Publisher, index=True)
+    title: str = ormar.String(max_length=100)
+    year: int = ormar.Integer(nullable=True)
+
+
+@pytest.fixture(autouse=True, scope="function")  # TODO: fix this to be module
+def create_test_database():
+    engine = sqlalchemy.create_engine(DATABASE_URL)
+    metadata.drop_all(engine)
+    metadata.create_all(engine)
+    yield
+    metadata.drop_all(engine)
+
+
+@pytest_asyncio.fixture
+async def author():
+    author = await Author(name="Author", score=10).save()
+    return author
+
+
+@pytest_asyncio.fixture
+async def publisher():
+    publisher = await Publisher(name="Publisher", prestige=random.randint(0, 10)).save()
+    return publisher
+
+
+@pytest_asyncio.fixture
+async def authors_in_db(num_models: int):
+    authors = [
+        Author(
+            name="".join(random.sample(string.ascii_letters, 5)),
+            score=random.random() * 100,
+        )
+        for i in range(0, num_models)
+    ]
+    await Author.objects.bulk_create(authors)
+    return await Author.objects.all()
+
+
+@pytest_asyncio.fixture
+@pytest.mark.benchmark(
+    min_rounds=1, timer=time.process_time, disable_gc=True, warmup=False
+)
+async def aio_benchmark(benchmark, event_loop: asyncio.BaseEventLoop):
+    def _fixture_wrapper(func):
+        def _func_wrapper(*args, **kwargs):
+            if asyncio.iscoroutinefunction(func):
+
+                @benchmark
+                def benchmarked_func():
+                    a = event_loop.run_until_complete(func(*args, **kwargs))
+                    return a
+
+                return benchmarked_func
+            else:
+                return benchmark(func, *args, **kwargs)
+
+        return _func_wrapper
+
+    return _fixture_wrapper
--- a/benchmarks/test_benchmark_aggregate.py
+++ b/benchmarks/test_benchmark_aggregate.py
@ -0,0 +1,57 @@
+from typing import List
+
+import pytest
+
+from benchmarks.conftest import Author
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_count(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def count():
+        return await Author.objects.count()
+
+    c = count()
+    assert c == len(authors_in_db)
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_avg(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def avg():
+        return await Author.objects.avg("score")
+
+    average = avg()
+    assert 0 <= average <= 100
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_sum(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def sum_():
+        return await Author.objects.sum("score")
+
+    s = sum_()
+    assert 0 <= s <= 100 * num_models
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_min(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def min_():
+        return await Author.objects.min("score")
+
+    m = min_()
+    assert 0 <= m <= 100
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_max(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def max_():
+        return await Author.objects.max("score")
+
+    m = max_()
+    assert 0 <= m <= 100
--- a/benchmarks/test_benchmark_bulk_create.py
+++ b/benchmarks/test_benchmark_bulk_create.py
@ -0,0 +1,26 @@
+import random
+import string
+
+import pytest
+
+from benchmarks.conftest import Author
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_making_and_inserting_models_in_bulk(aio_benchmark, num_models: int):
+    @aio_benchmark
+    async def make_and_insert(num_models: int):
+        authors = [
+            Author(
+                name="".join(random.sample(string.ascii_letters, 5)),
+                score=random.random() * 100,
+            )
+            for i in range(0, num_models)
+        ]
+        assert len(authors) == num_models
+
+        await Author.objects.bulk_create(authors)
+
+    make_and_insert(num_models)
--- a/benchmarks/test_benchmark_bulk_update.py
+++ b/benchmarks/test_benchmark_bulk_update.py
@ -0,0 +1,27 @@
+import random
+import string
+from typing import List
+
+import pytest
+
+from benchmarks.conftest import Author
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_updating_models_in_bulk(
+    aio_benchmark, num_models: int, authors_in_db: List[Author]
+):
+    starting_first_name = authors_in_db[0].name
+
+    @aio_benchmark
+    async def update(authors: List[Author]):
+        await Author.objects.bulk_update(authors)
+
+    for author in authors_in_db:
+        author.name = "".join(random.sample(string.ascii_letters, 5))
+
+    update(authors_in_db)
+    author = await Author.objects.get(id=authors_in_db[0].id)
+    assert author.name != starting_first_name
--- a/benchmarks/test_benchmark_create.py
+++ b/benchmarks/test_benchmark_create.py
@ -0,0 +1,91 @@
+import random
+import string
+
+import pytest
+
+from benchmarks.conftest import Author, Book, Publisher
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_creating_models_individually(aio_benchmark, num_models: int):
+    @aio_benchmark
+    async def create(num_models: int):
+        authors = []
+        for idx in range(0, num_models):
+            author = await Author.objects.create(
+                name="".join(random.sample(string.ascii_letters, 5)),
+                score=random.random() * 100,
+            )
+            authors.append(author)
+        return authors
+
+    authors = create(num_models)
+    for author in authors:
+        assert author.id is not None
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_creating_individually_with_related_models(
+    aio_benchmark, num_models: int, author: Author, publisher: Publisher
+):
+    @aio_benchmark
+    async def create_with_related_models(
+        author: Author, publisher: Publisher, num_models: int
+    ):
+        books = []
+        for idx in range(0, num_models):
+            book = await Book.objects.create(
+                author=author,
+                publisher=publisher,
+                title="".join(random.sample(string.ascii_letters, 5)),
+                year=random.randint(0, 2000),
+            )
+            books.append(book)
+
+        return books
+
+    books = create_with_related_models(
+        author=author, publisher=publisher, num_models=num_models
+    )
+
+    for book in books:
+        assert book.id is not None
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_get_or_create_when_create(aio_benchmark, num_models: int):
+    @aio_benchmark
+    async def get_or_create(num_models: int):
+        authors = []
+        for idx in range(0, num_models):
+            author, created = await Author.objects.get_or_create(
+                name="".join(random.sample(string.ascii_letters, 5)),
+                score=random.random() * 100,
+            )
+            assert created
+            authors.append(author)
+        return authors
+
+    authors = get_or_create(num_models)
+    for author in authors:
+        assert author.id is not None
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_update_or_create_when_create(aio_benchmark, num_models: int):
+    @aio_benchmark
+    async def update_or_create(num_models: int):
+        authors = []
+        for idx in range(0, num_models):
+            author = await Author.objects.update_or_create(
+                name="".join(random.sample(string.ascii_letters, 5)),
+                score=random.random() * 100,
+            )
+            authors.append(author)
+        return authors
+
+    authors = update_or_create(num_models)
+    for author in authors:
+        assert author.id is not None
--- a/benchmarks/test_benchmark_delete.py
+++ b/benchmarks/test_benchmark_delete.py
@ -0,0 +1,36 @@
+from typing import List
+
+import pytest
+
+from benchmarks.conftest import Author
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_deleting_all(
+    aio_benchmark, num_models: int, authors_in_db: List[Author]
+):
+    @aio_benchmark
+    async def delete_all():
+        await Author.objects.delete(each=True)
+
+    delete_all()
+
+    num = await Author.objects.count()
+    assert num == 0
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_deleting_individually(
+    aio_benchmark, num_models: int, authors_in_db: List[Author]
+):
+    @aio_benchmark
+    async def delete_one_by_one(authors: List[Author]):
+        for author in authors:
+            await Author.objects.filter(id=author.id).delete()
+
+    delete_one_by_one(authors_in_db)
+
+    num = await Author.objects.count()
+    assert num == 0
--- a/benchmarks/test_benchmark_get.py
+++ b/benchmarks/test_benchmark_get.py
@ -0,0 +1,102 @@
+import random
+import string
+from typing import List
+
+import pytest
+import pytest_asyncio
+
+from benchmarks.conftest import Author, Book, Publisher
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest_asyncio.fixture()
+async def books(author: Author, publisher: Publisher, num_models: int):
+    books = [
+        Book(
+            author=author,
+            publisher=publisher,
+            title="".join(random.sample(string.ascii_letters, 5)),
+            year=random.randint(0, 2000),
+        )
+        for _ in range(0, num_models)
+    ]
+    await Book.objects.bulk_create(books)
+    return books
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_get_all(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def get_all(authors: List[Author]):
+        return await Author.objects.all()
+
+    authors = get_all(authors_in_db)
+    for idx, author in enumerate(authors_in_db):
+        assert authors[idx].id == author.id
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_get_all_with_related_models(
+    aio_benchmark, num_models: int, author: Author, books: List[Book]
+):
+    @aio_benchmark
+    async def get_with_related(author: Author):
+        return await Author.objects.select_related("books").all(id=author.id)
+
+    authors = get_with_related(author)
+    assert len(authors[0].books) == num_models
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_get_one(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def get_one(authors: List[Author]):
+        return await Author.objects.get(id=authors[0].id)
+
+    author = get_one(authors_in_db)
+    assert author == authors_in_db[0]
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_get_or_none(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def get_or_none(authors: List[Author]):
+        return await Author.objects.get_or_none(id=authors[0].id)
+
+    author = get_or_none(authors_in_db)
+    assert author == authors_in_db[0]
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_get_or_create_when_get(
+    aio_benchmark, num_models: int, authors_in_db: List[Author]
+):
+    @aio_benchmark
+    async def get_or_create(authors: List[Author]):
+        author, created = await Author.objects.get_or_create(id=authors[0].id)
+        assert not created
+        return author
+
+    author = get_or_create(authors_in_db)
+    assert author == authors_in_db[0]
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_first(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def first():
+        return await Author.objects.first()
+
+    author = first()
+    assert author == authors_in_db[0]
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_exists(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def check_exists(authors: List[Author]):
+        return await Author.objects.filter(id=authors[0].id).exists()
+
+    exists = check_exists(authors_in_db)
+    assert exists
--- a/benchmarks/test_benchmark_init.py
+++ b/benchmarks/test_benchmark_init.py
@ -0,0 +1,48 @@
+import random
+import string
+
+import pytest
+
+from benchmarks.conftest import Author, Book, Publisher
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_initializing_models(aio_benchmark, num_models: int):
+    @aio_benchmark
+    async def initialize_models(num_models: int):
+        authors = [
+            Author(
+                name="".join(random.sample(string.ascii_letters, 5)),
+                score=random.random() * 100,
+            )
+            for i in range(0, num_models)
+        ]
+        assert len(authors) == num_models
+
+    initialize_models(num_models)
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_initializing_models_with_related_models(aio_benchmark, num_models: int):
+    @aio_benchmark
+    async def initialize_models_with_related_models(
+        author: Author, publisher: Publisher, num_models: int
+    ):
+        books = [
+            Book(
+                author=author,
+                publisher=publisher,
+                title="".join(random.sample(string.ascii_letters, 5)),
+                year=random.randint(0, 2000),
+            )
+            for i in range(0, num_models)
+        ]
+
+    author = await Author(name="Author", score=10).save()
+    publisher = await Publisher(name="Publisher", prestige=random.randint(0, 10)).save()
+
+    ids = initialize_models_with_related_models(
+        author=author, publisher=publisher, num_models=num_models
+    )
--- a/benchmarks/test_benchmark_iterate.py
+++ b/benchmarks/test_benchmark_iterate.py
@ -0,0 +1,21 @@
+from typing import List
+
+import pytest
+
+from benchmarks.conftest import Author
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_iterate(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def iterate_over_all(authors: List[Author]):
+        authors = []
+        async for author in Author.objects.iterate():
+            authors.append(author)
+        return authors
+
+    authors = iterate_over_all(authors_in_db)
+    for idx, author in enumerate(authors_in_db):
+        assert authors[idx].id == author.id
--- a/benchmarks/test_benchmark_save.py
+++ b/benchmarks/test_benchmark_save.py
@ -0,0 +1,65 @@
+import random
+import string
+
+import pytest
+
+from benchmarks.conftest import Author, Book, Publisher
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_saving_models_individually(aio_benchmark, num_models: int):
+    @aio_benchmark
+    async def make_and_insert(num_models: int):
+        authors = [
+            Author(
+                name="".join(random.sample(string.ascii_letters, 5)),
+                score=random.random() * 100,
+            )
+            for i in range(0, num_models)
+        ]
+        assert len(authors) == num_models
+
+        ids = []
+        for author in authors:
+            a = await author.save()
+            ids.append(a)
+        return ids
+
+    ids = make_and_insert(num_models)
+    for id in ids:
+        assert id is not None
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_saving_models_individually_with_related_models(
+    aio_benchmark, num_models: int, author: Author, publisher: Publisher
+):
+    @aio_benchmark
+    async def making_and_inserting_related_models_one_by_one(
+        author: Author, publisher: Publisher, num_models: int
+    ):
+        books = [
+            Book(
+                author=author,
+                publisher=publisher,
+                title="".join(random.sample(string.ascii_letters, 5)),
+                year=random.randint(0, 2000),
+            )
+            for i in range(0, num_models)
+        ]
+
+        ids = []
+        for book in books:
+            await book.save()
+            ids.append(book.id)
+
+        return ids
+
+    ids = making_and_inserting_related_models_one_by_one(
+        author=author, publisher=publisher, num_models=num_models
+    )
+
+    for id in ids:
+        assert id is not None
--- a/benchmarks/test_benchmark_update.py
+++ b/benchmarks/test_benchmark_update.py
@ -0,0 +1,27 @@
+import random
+import string
+from typing import List
+
+import pytest
+
+from benchmarks.conftest import Author
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [10, 20, 40])
+async def test_updating_models_individually(
+    aio_benchmark, num_models: int, authors_in_db: List[Author]
+):
+    starting_first_name = authors_in_db[0].name
+
+    @aio_benchmark
+    async def update(authors: List[Author]):
+        for author in authors:
+            a = await author.update(
+                name="".join(random.sample(string.ascii_letters, 5))
+            )
+
+    update(authors_in_db)
+    author = await Author.objects.get(id=authors_in_db[0].id)
+    assert author.name != starting_first_name
--- a/benchmarks/test_benchmark_values.py
+++ b/benchmarks/test_benchmark_values.py
@ -0,0 +1,29 @@
+from typing import List
+
+import pytest
+
+from benchmarks.conftest import Author
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_values(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def get_all_values(authors: List[Author]):
+        return await Author.objects.values()
+
+    authors_list = get_all_values(authors_in_db)
+    for idx, author in enumerate(authors_in_db):
+        assert authors_list[idx]["id"] == author.id
+
+
+@pytest.mark.parametrize("num_models", [250, 500, 1000])
+async def test_values_list(aio_benchmark, num_models: int, authors_in_db: List[Author]):
+    @aio_benchmark
+    async def get_all_values_list(authors: List[Author]):
+        return await Author.objects.values_list()
+
+    authors_list = get_all_values_list(authors_in_db)
+    for idx, author in enumerate(authors_in_db):
+        assert authors_list[idx][0] == author.id