From adbdfa120925bc22a354a3142b0c55c037316bc7 Mon Sep 17 00:00:00 2001 From: nocturn9x Date: Tue, 17 Nov 2020 10:06:35 +0100 Subject: [PATCH] README almost complete, improved examples --- README.md | 308 ++++++++++++++++++++++++++---------------- giambio/core.py | 18 ++- giambio/exceptions.py | 8 ++ tests/count.py | 12 +- 4 files changed, 222 insertions(+), 124 deletions(-) diff --git a/README.md b/README.md index 86acf8d..8d8baa3 100644 --- a/README.md +++ b/README.md @@ -27,20 +27,21 @@ One possible approach to achieve concurrency is to use threads, and despite thei actually might be a good choice when it comes to I/O for reasons that span far beyond the scope of this tutorial. If you choose to use threads, there are a couple things you can do, involving what is known as _thread synchronization primitives_ and _thread pools_, but once again that is beyond the purposes of this quickstart guide. -A library like giambio comes into play when you need to perform lots of [blocking operations](https://en.wikipedia.org/wiki/Blocking_(computing)) -and network servers, among other things, happen to be heavily based on I/O: a blocking operation. +A library like giambio comes into play when you need to perform lots of [blocking operations](https://en.wikipedia.org/wiki/Blocking_(computing)), +and network servers happens to be heavily based on I/O: a blocking operation. Starting to see where we're heading? ## A deeper dive -giambio has been designed with simplicity in mind, so this README won't explain all the gritty details about _how_ async is +Giambio has been designed with simplicity in mind, so this document won't explain all the gritty details about _how_ async is implemented in Python (you might want to check out [this article](https://snarky.ca/how-the-heck-does-async-await-work-in-python-3-5/) if you want to learn more about all the implementation details). For the sake of this tutorial, all you need to know is that giambio is all about a feature added in Python 3.5: asynchronous functions, or 'async' for short. -Async functions are functions defined with `async def` instead of the regular `def`, like so: -```python +Async functions are functions defined with `async def` instead of the regular `def`, like so: + +```python async def async_fun(): # An async function print("Hello, world!") @@ -61,39 +62,39 @@ async def async_one(): ``` It has to be noted that using `await` outside of an async function is a `SyntaxError`, so basically async -functions have a unique superpower: they can call other async functions. This already presents a chicken-and-egg -problem, because when you fire up Python it is running plain ol' synchronous code, so how do we enter the async -context in the first place? That is done via a special _synchronous function_, `giambio.run` in our case, -that has the ability to call asynchronous functions and can therefore initiate the async context. For this -reason, `giambio.run` **must** be called from a synchronous context, to avoid a horrible _deadlock_. Now -that you know all of this, you might be wondering why on earth would one use async functions instead of +functions have a unique superpower: they, and no-one else, can call other async functions. + +This already presents a chicken-and-egg problem, because when you fire up Python, it is running plain ol' +synchronous code; So how do we enter the async context in the first place? + +That is done via a special _synchronous function_, `giambio.run` in our case, that has the ability to call +asynchronous functions and can therefore initiate the async context. For this +reason, `giambio.run` **must** be called from a synchronous context, to avoid a horrible _deadlock_. + +Now that you know all of this, you might be wondering why on earth would one use async functions instead of regular functions: after all, their ability to call other async functions seems pretty pointless in itself, doesn't it? Take a look at this example below: ```python - import giambio - async def foo(): print("Hello, world!") - giambio.run(foo) # Prints 'Hello, world!' ``` This could as well be written the following way and would produce the same output: ```python - def foo(): print("Hello, world!") - foo() # Prints 'Hello, world!' ``` To answer this question, we have to dig a bit deeper about _what_ giambio gives you in exchange for all this `async`/`await` madness. + We already introduced `giambio.run`, a special runner function that can start the async context from a synchronous one, but giambio provides also a set of tools, mainly for doing I/O. These functions, as you might have guessed, are async functions and they're useful! So if you wanna take advantage of giambio, and hopefully you will after reading this guide, you need to write async code. @@ -104,15 +105,12 @@ so far. Don't worry, that is intentional: giambio never lets a user deal with co model is much simpler if we take coroutines out of the game, and everything works just the same. ```python - import giambio - async def sleep_double(n): await giambio.sleep(2 * n) - -giambio.run(sleep_double, 2) # This hangs for 4 seconds and then returns +giambio.run(sleep_double, 2) # This hangs for 4 seconds and returns ``` As it turns out, this function is one that's actually worth making async: because it calls another async function. @@ -121,14 +119,14 @@ make it async in the first place. ### Don't forget the `await`! -As we already learned, async functions can only be called with the `await` keyword, so you would think that not -doing so would raise an error, but it's actually a little bit trickier than that. Take this example here +As we already learned, async functions can only be called with the `await` keyword, and it would be logical to +think that forgetting to do so would raise an error, but it's actually a little bit trickier than that. + +Take this example here: ```python - import giambio - async def sleep_double_broken(n): print("Taking a nap!") start = giambio.clock() @@ -136,7 +134,6 @@ async def sleep_double_broken(n): end = giambio.clock() - start print(f"Slept for {end:.2f} seconds!") - giambio.run(sleep_double_broken, 2) ``` @@ -151,12 +148,13 @@ __main__:7: RuntimeWarning: coroutine 'sleep' was never awaited Wait, what happened here? From this output, it looks like the code worked, but something clearly went wrong: the function didn't sleep. Python gives us a hint that we broke _something_ by raising a warning, complaining that `coroutine 'sleep' was never awaited` (you might not see this warning because it depends on whether a -garbage collection cycle occurred or not). I know I said we weren't going to talk about coroutines, but you have -to blame Python, not me. Just know that if you see a warning like that, it means that somewhere in your code -you forgot an `await` when calling an async function, so try fixing that before trying to figure out what could -be the problem if you have a long traceback: most likely that's just collateral damage caused by the missing keyword. +garbage collection cycle occurred or not). +I know I said we weren't going to talk about coroutines, but you have to blame Python, not me. Just know that +if you see a warning like that, it means that somewhere in your code you forgot an `await` when calling an async +function, so try fixing that before trying to figure out what could be the problem if you have a long traceback: +most likely that's just collateral damage caused by the missing keyword. -If you're ok with just remembering to put `await` every time you call an async function you can safely skip to +If you're ok with just remembering to put `await` every time you call an async function, you can safely skip to the next section, but for the curios among y'all I might as well explain exactly what happened there. When coroutines are called without the `await`, they don't exactly do nothing: they return this weird 'coroutine' @@ -184,118 +182,196 @@ Our previous examples could be written using sync functions (like `time.sleep`) quite useful is it? But here comes the reason why you would want to use a library like giambio: it can run multiple async functions __at the same time__. -Yep, you read that right. To demonstrate this, have a look a this example +Yep, you read that right. + +To demonstrate this, have a look a this example ```python - import giambio +async def countdown(n: int): + print(f"Counting down from {n}!") + while n > 0: + print(f"Down {n}") + n -= 1 + await giambio.sleep(1) + print("Countdown over") + return 0 -async def child(sleep: int, ident: int): - start = giambio.clock() # This returns the current time from giambio's perspective - print(f"[child {ident}] Gonna sleep for {sleep} seconds!") - await giambio.sleep(sleep) - end = giambio.clock() - start - print(f"[child {ident}] I woke up! Slept for {end} seconds") - +async def countup(stop: int): + print(f"Counting up to {stop}!") + x = 0 + while x < stop: + print(f"Up {x}") + x += 1 + await giambio.sleep(2) + print("Countup over") + return 1 async def main(): - print("[parent] Spawning children") - task = giambio.spawn(child, 1, 1) # We spawn a child task - task2 = giambio.spawn(child, 2, 2) # and why not? another one! start = giambio.clock() - print("[parent] Children spawned, awaiting completion") - await task.join() - await task2.join() - end = giambio.clock() - start - print(f"[parent] Execution terminated in {end} seconds") - + async with giambio.create_pool() as pool: + pool.spawn(countdown, 10) + pool.spawn(countup, 5) + print("Children spawned, awaiting completion") + print(f"Task execution complete in {giambio.clock() - start:2f} seconds") if __name__ == "__main__": - giambio.run(main) # Start the async context -``` + giambio.run(main) -If you run that code, your output should look something like this (the order of the lines might be swapped): - -``` -[parent] Spawning children -[parent] Children spawned, awaiting completion -[child 1] Gonna sleep for 1 seconds! -[child 2] Gonna sleep for 2 seconds! -[...1 second passes...] -[child 1] I woke up! Slept for 1.004422144 seconds -[...another second passes...] -[child 2] I woke up! Slept for 2.0039494860000002 seconds -[parent] Execution terminated in 2.004069701 seconds ``` There is a lot going on here, and we'll explain every bit of it step by step: -TODO +- First, we imported giambio and defined two async functions: `countup` and `countdown` +- These two functions do exactly what their name suggests, but for the purposes of +this tutorial, `countup` will be running twice as slow as `countdown` (see the call +to `await giambio.sleep(2)`?) +- Here comes the real fun: `async with`? What's going on there? +As it turns out, Python 3.5 didn't just add async functions, but also quite a bit +of related new syntax. One of the things that was added is asynchronous context managers. +You might have already encountered context managers in python, but in case you didn't, +a line such as `with foo as sth` tells the Python interpreter to call `foo.__enter__()` +at the beginning of the block, and `foo.__exit__()` at the end of the block. The `as` +keyword just assigns the return value of `foo.__enter__()` to the variable `sth`. So +context managers are a shorthand for calling functions, and since Python 3.5 added +async functions, we also needed async context managers. While `with foo as sth` calls +`foo.__enter__()`, `async with foo as sth` calls `await foo.__aenter__()`, easy huh? -## Doing I/O +__Note__: On a related note, Python 3.5 also added asynchronous for loops! The logic is +the same though: while `for item in container` calls `container.__next__()` to fetch the +next item, `async for item in container` calls `await container.__anext__()` to do so. +It's _that_ simple, mostly just remember to stick `await` everywhere and you'll be good. -TODO +- Ok, so now we grasp `async with`, but what's with that `create_pool()`? In giambio, +there are actually 2 ways to call async functions: one we've already seen (`await fn()`), +while the other is trough an asynchronous pool. The cool part about `pool.spawn()` is +that it will return immediately, without waiting for the async function to finish. So, +now our functions are running in the background. +After we spawn our tasks, we hit the call to `print` and the end of the block, so Python +calls the pool's `__aexit__()` method. What this does is pause the parent task (our `main` +async function in this case) until all children task have exited, and as it turns out, that +is a good thing. +The reason why pools always wait for all children to have finished executing is that it makes +easier propagating exceptions in the parent if something goes wrong: unlike many other frameworks, +exceptions in giambio always behave as expected + + +Ok, so, let's try running this snippet and see what we get: + +``` +Children spawned, awaiting completion +Counting down from 10! +Down 10 +Counting up to 5! +Up 0 +Down 9 +Up 1 +Down 8 +Down 7 +Up 2 +Down 6 +Down 5 +Up 3 +Down 4 +Down 3 +Up 4 +Down 2 +Down 1 +Countup over +Countdown over +Task execution complete in 10.07 seconds +``` + +(Your output might have some lines swapped compared to this) + +You see how `countup` and `countdown` both start and finish +together? Moreover, even though each function slept for about 10 +seconds (therefore 20 seconds total), the program just took 10 +seconds to complete, so our children are really running at the same time. + +If you've ever done thread programming, this will feel like home, and that's good: +that's exactly what we want. But beware! No threads are involved here, giambio is +running in a single thread. That's why we talked about _tasks_ rather than _threads_ +so far. The difference between the two is that you can run a lot of tasks in a single +thread, and that with threads Python can switch which thread is running at any time. +Giambio, on the other hand, can switch tasks only at certain fixed points called +_checkpoints_, more on that later. + +### A sneak peak into the async world + +The basic idea behind libraries like giambio is that they can run a lot of tasks +at the same time by switching back and forth between them at appropriate places. +An example for that could be a web server: while the server is waiting for a response +from a client, we can accept another connection. You don't necessarily need all these +pesky details to use giambio, but it's good to have at least an high-level understanding +of how this all works. + +The peculiarity of asynchronous functions is that they can suspend their execution: that's +what `await` does, it yields back the execution control to giambio, which can then decide +what to do next. + +To understand this better, take a look at this code: ```python +def countdown(n: int) -> int: + while n: + yield n + n -= 1 -import giambio -from giambio.socket import AsyncSocket -import socket -import logging - - -logging.basicConfig( - level=20, - format="[%(levelname)s] %(asctime)s %(message)s", - datefmt="%d/%m/%Y %p" - ) - - -async def server(address: tuple): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create the socket object - sock.bind(address) # We bind to the address - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sock.listen(5) # We start listening for connections, this must be done *before* wrapping the socket - asock = giambio.wrap_socket(sock) # We make the socket an async socket - logging.info(f"Echo server serving asynchronously at {address}") - while True: - conn, addr = await asock.accept() # Here we use our wrapper - logging.info(f"{addr} connected") - giambio.spawn(echo_handler, conn, addr) # We spawn a child task and keep listening for other clients - - -async def echo_handler(sock: AsyncSocket, addr: tuple): - async with sock: # Handy trick that will automagically close the socket for us when we're done - await sock.send_all(b"Welcome to the server pal, feel free to send me something!\n") - while True: - await sock.send_all(b"-> ") - data = await sock.receive(1024) # We get some data from the client - if not data: - break - # Below there's some decoding/encoding mess that you can safely not care about - to_send_back = data - data = data.decode("utf-8").encode("unicode_escape") - logging.info(f"Got: '{data.decode('utf-8')}' from {addr}") - # And now we send back our reply! - await sock.send_all(b"Got: " + to_send_back) - logging.info(f"Echoed back '{data.decode('utf-8')}' to {addr}") - # When we exit the context manager, the client has disconnected - logging.info(f"Connection from {addr} closed") - - -if __name__ == "__main__": - try: - giambio.run(server, ("", 1501)) - except BaseException as error: # Exceptions propagate! - print(f"Exiting due to a {type(error).__name__}: '{error}'") +for x in countdown(5): + print(x) ``` +In the above snippet, `countdown` is a generator function. Generators are really useful because +they allow to customize iteration. Running that code produces the following output: + +``` +5 +4 +3 +2 +1 +``` + +The trick for this to work is `yield`. +What `yield` does is return back to the caller and suspend itself: In our case, `yield` +returns to the for loop, which calls `countdown` again. So, the generator resumes right +after the `yield`, decrements n, and loops right back to the top for the while loop to +execute again. It's that suspension part that allows the async magic to happen: the whole +`async`/`await` logic overlaps a lot with generator functions. + +Some libraries, like `asyncio`, take advantage of this yielding mechanism, because they were made +way before Python 3.5 added that nice new syntax. + +So, since only async functions can suspend themselves, the only places where giambio will switch +tasks is where there is a call to `await something()`. If there is no `await`, then you can be sure +that giambio will not switch tasks (because it can't): this makes the asynchronous model much easier +to reason about, because you can immediately statically infer if function will ever switch, and where +will it do so, unlike threads which can (and will) switch whenever they feel like it. + +Remember when we talked about checkpoints? That's what they are: async functions that allow giambio +to switch tasks. The problem with checkpoints is that if you don't have enough of them in your code, +then giambio will switch less frequently, hurting concurrency. It turns out that a quick and easy fix +for that is calling `await giambio.sleep(0)`; This will implicitly let giambio kick in and do its job, +and it will reschedule the caller almost immediately, because the sleep time is 0. + +### Mix and match? No thanks + +You may wonder whether you can mix async libraries: for instance, can we call `trio.sleep` in a +giambio application? The answer is no, we can't, and there's a reason for that. Giambio wraps all +your asynchronous code in its event loop, which is what actually runs the tasks. When you call +`await giambio.something()`, what you're doing is sending "commands" to the event loop asking it +to perform a certain thing in a given task, and to communicate your intent to the loop, the +primitives (such as `giambio.sleep`) talk a language that only giambio's event loop can understand. +Other libraries have other private "languages", so mixing them is not possible: doing so will cause +giambio to get very confused and most likely just explode spectacularly badly + ## Contributing This is a relatively young project and it is looking for collaborators! It's not rocket science, but writing a proper framework like this implies some non-trivial issues that require proper and optimized solutions, -so if you feel like you want to challenge yourself don't hesitate to contact me on [Telegram](https://telegram.me/isgiambyy) +so if you feel like you want to challenge yourself don't hesitate to contact me on [Telegram](https://telegram.me/nocturn9x) or by [E-mail](mailto:hackhab@gmail.com) diff --git a/giambio/core.py b/giambio/core.py index d2deb1b..36933f6 100644 --- a/giambio/core.py +++ b/giambio/core.py @@ -27,7 +27,7 @@ from .traps import want_read, want_write from collections import deque from .socket import AsyncSocket, WantWrite, WantRead from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE -from .exceptions import ( +from .exceptions import (InternalError, CancelledError, ResourceBusy, ) @@ -129,6 +129,9 @@ class AsyncScheduler: self.to_send = None # Sneaky method call, thanks to David Beazley for this ;) getattr(self, method)(*args) + except AttributeError: # If this happens, that's quite bad! + raise InternalError("Uh oh! Something very bad just happened, did" + " you try to mix primitives from other async libraries?") from None except CancelledError: self.current_task.status = "cancelled" self.current_task.cancelled = True @@ -193,12 +196,19 @@ class AsyncScheduler: Checks and schedules task to perform I/O """ - # If there are no tasks ready wait indefinitely - timeout = 0.0 if self.tasks else None + if self.tasks or self.events: # If there are tasks or events, never wait + timeout = 0.0 + elif self.paused: # If there are asleep tasks, wait until the closest + # deadline + timeout = max(0.0, self.paused[0][0] - self.clock()) + else: + timeout = None # If we _only_ have I/O to do, then wait indefinitely for key in dict(self.selector.get_map()).values(): + # We make sure we don't reschedule finished tasks if key.data.finished: + key.data.last_io = () self.selector.unregister(key.fileobj) - if self.selector.get_map(): + if self.selector.get_map(): # If there is indeed tasks waiting on I/O io_ready = self.selector.select(timeout) # Get sockets that are ready and schedule their tasks for key, _ in io_ready: diff --git a/giambio/exceptions.py b/giambio/exceptions.py index 5e70eb6..bd531dc 100644 --- a/giambio/exceptions.py +++ b/giambio/exceptions.py @@ -25,6 +25,14 @@ class GiambioError(Exception): ... +class InternalError(GiambioError): + """ + Internal exception + """ + + ... + + class CancelledError(BaseException): """ Exception raised by the giambio.objects.Task.cancel() method diff --git a/tests/count.py b/tests/count.py index c216e02..14e9f09 100644 --- a/tests/count.py +++ b/tests/count.py @@ -5,6 +5,7 @@ import giambio async def countdown(n: int): + print(f"Counting down from {n}!") while n > 0: print(f"Down {n}") n -= 1 @@ -15,23 +16,26 @@ async def countdown(n: int): async def countup(stop: int, step: int = 1): + print(f"Counting up to {stop}!") x = 0 while x < stop: print(f"Up {x}") - x += step + x += 1 await giambio.sleep(step) print("Countup over") return 1 async def main(): + start = giambio.clock() try: async with giambio.create_pool() as pool: - pool.spawn(countdown, 5) - pool.spawn(countup, 5, 1) + pool.spawn(countdown, 10) + pool.spawn(countup, 5, 2) + print("Children spawned, awaiting completion") except Exception as e: print(f"Got -> {type(e).__name__}: {e}") - print("Task execution complete") + print(f"Task execution complete in {giambio.clock() - start:.2f} seconds") if __name__ == "__main__":