home

Menu
  • ripgrep search

ripgrep

Options:

For example *.py or **/templates/**/*.html or datasette/** or !setup.py

whosonfirst-datasette/Dockerfile

1   FROM datasetteproject/datasette
2   RUN apt update && apt install -y wget bzip2
3   RUN wget -q https://latest.datasette.io/fixtures.db
4   # RUN datasette inspect --inspect-file=/mnt/whosonfirst/inspect.json /fixtures.db /mnt/whosonfirst/whosonfirst-data-latest.db
5   EXPOSE 8001
6   CMD ["datasette", "-h", "0.0.0.0", "-p", "8001", "/mnt/whosonfirst/whosonfirst-data-latest.db", "/fixtures.db", "--cors"]

yaml-to-sqlite/tests/test_cli.py

1   from click.testing import CliRunner
2   from yaml_to_sqlite import cli
3   import sqlite_utils
29  
30  
31  def test_without_pk(tmpdir):
32      db_path = tmpdir / "db.db"
33      assert (
44  
45  
46  def test_with_pk(tmpdir):
47      db_path = tmpdir / "db.db"
48      assert (
59  
60  
61  def test_single_column(tmpdir):
62      db_path = tmpdir / "db.db"
63      test_yaml = "- One\n" "- Two\n" "- Three\n"
64      assert (
65          0
68              cli.cli,
69              [str(db_path), "numbers", "-", "--single-column", "name"],
70              input=test_yaml,
71          )
72          .exit_code
78  
79  
80  def test_alters_if_necessary(tmpdir):
81      db_path = tmpdir / "db.db"
82      assert (

yaml-to-sqlite/setup.py

24      packages=find_packages(),
25      install_requires=["click", "PyYAML", "sqlite-utils>=3.9.1"],
26      setup_requires=["pytest-runner"],
27      extras_require={"test": ["pytest"]},
28      entry_points="""
29          [console_scripts]
30          yaml-to-sqlite=yaml_to_sqlite.cli:cli
31      """,
32      tests_require=["yaml-to-sqlite[test]"],
33      url="https://github.com/simonw/yaml-to-sqlite",
34      classifiers=[

vaccinate-ca-datasette/metadata.yml

5   about_url: https://github.com/simonw/vaccinate-ca-datasette
6   description_html: |-
7     <a href="/vaccinateca/locations?_facet=Affiliation&_facet=Latest+report+yes%3F&Latest+report+yes%3F=1&_facet_array=Availability+Info">Latest places that reported yes, on a map</a>

twitter-to-sqlite/tests/test_migrations.py

1   import sqlite_utils
2   from click.testing import CliRunner
3   import sqlite_utils
4   from twitter_to_sqlite import cli, migrations
5   
6   from .test_import import zip_contents_path
7   from .test_save_tweets import db, tweets
8   
9   
10  def test_no_migrations_on_first_run(tmpdir, zip_contents_path):
11      output = str(tmpdir / "output.db")
12      args = ["import", output, str(zip_contents_path / "follower.js")]
21  
22  
23  def test_convert_source_column():
24      db = sqlite_utils.Database(memory=True)
25      db["tweets"].insert_all(
51  
52  
53  def test_convert_source_column_against_real_database(db):
54      assert "migrations" not in db.table_names()
55      migrations.convert_source_column(db)

twitter-to-sqlite/tests/test_save_tweets.py

2   import pathlib
3   
4   import pytest
5   import sqlite_utils
6   from twitter_to_sqlite import utils
7   
8   
9   @pytest.fixture
10  def tweets():
11      return json.load(open(pathlib.Path(__file__).parent / "tweets.json"))
12  
13  
14  @pytest.fixture
15  def db(tweets):
16      db = sqlite_utils.Database(memory=True)
19  
20  
21  def test_tables(db):
22      assert {
23          "sources",
52  
53  
54  def test_users(db):
55      user_rows = list(db["users"].rows)
56      assert [
184 
185 
186 def test_tweets(db):
187     tweet_rows = list(db["tweets"].rows)
188     assert [
315 
316 
317 def test_sources(db):
318     source_rows = list(db["sources"].rows)
319     assert [
341 
342 
343 def test_places(db):
344     place_rows = list(db["places"].rows)
345     assert [
359 
360 
361 def test_media(db):
362     media_rows = list(db["media"].rows)
363     media_tweets_rows = list(db["media_tweets"].rows)

twitter-to-sqlite/tests/test_import.py

2   import pathlib
3   
4   import pytest
5   import sqlite_utils
6   from click.testing import CliRunner
7   from twitter_to_sqlite import cli
8   
10  
11  
12  @pytest.fixture
13  def zip_contents_path():
14      return pathlib.Path(__file__).parent / "zip_contents"
15  
16  
17  @pytest.fixture
18  def import_test_zip(tmpdir, zip_contents_path):
19      archive = str(tmpdir / "archive.zip")
20      buf = io.BytesIO()
25  
26  
27  def test_create_zip(zip_contents_path):
28      zf = create_zip(zip_contents_path)
29      assert {
37  
38  
39  def test_cli_import_zip_file(import_test_zip):
40      tmpdir, archive = import_test_zip
41      output = str(tmpdir / "output.db")
42      result = CliRunner().invoke(cli.cli, ["import", output, archive])
46  
47  
48  def test_cli_import_folder(tmpdir, zip_contents_path):
49      output = str(tmpdir / "output.db")
50      result = CliRunner().invoke(cli.cli, ["import", output, str(zip_contents_path)])
54  
55  
56  def test_cli_import_specific_files(tmpdir, zip_contents_path):
57      output = str(tmpdir / "output.db")
58      result = CliRunner().invoke(
109 
110 
111 def test_deletes_existing_archive_tables(import_test_zip):
112     tmpdir, archive = import_test_zip
113     output = str(tmpdir / "output.db")
114     db = sqlite_utils.Database(output)

twitter-to-sqlite/setup.py

37          "python-dateutil",
38      ],
39      extras_require={"test": ["pytest"]},
40      tests_require=["twitter-to-sqlite[test]"],
41  )

todomvc-datasette/js/template.js

20  
21  	var escape = function (string) {
22  		return (string && reHasUnescapedHtml.test(string))
23  			? string.replace(reUnescapedHtml, escapeHtmlChar)
24  			: string;

todomvc-datasette/js/store.js

1   /*jshint eqeqeq:false */
2   // Signed token for the todomvc user on https://latest.datasette.io/
3   // See https://github.com/simonw/todomvc-datasette/issues/2
4   let TOKEN = 'dstok_eyJhIjoidG9kb212YyIsInRva2VuIjoiZHN0b2siLCJ0IjoxNjY5OTQ0NjgzfQ.wzSKl10MouQaKZ0FuTKEftNJko4';
13      let self = this;
14      self._dbName = `todo_${name}`;
15      fetch("https://latest.datasette.io/ephemeral/-/create", {
16        method: "POST",
17        mode: "cors",
48      }
49      fetch(
50        `https://latest.datasette.io/ephemeral/${
51          this._dbName
52        }.json?_shape=array&_sort_desc=id&completed=${
76      callback = callback || function () {};
77      fetch(
78        `https://latest.datasette.io/ephemeral/${self._dbName}.json?_shape=array&_sort_desc=id`,
79        {
80          mode: "cors",
107     if (id) {
108       fetch(
109         `https://latest.datasette.io/ephemeral/${self._dbName}/${id}/-/update`,
110         {
111           method: "POST",
124     } else {
125       // Save it and store ID
126       fetch(`https://latest.datasette.io/ephemeral/${self._dbName}/-/insert`, {
127         method: "POST",
128         mode: "cors",
151   Store.prototype.remove = function (id, callback) {
152     fetch(
153       `https://latest.datasette.io/ephemeral/${this._dbName}/${id}/-/delete`,
154       {
155         method: "POST",

todomvc-datasette/README.md

1   # TodoMVC demo backed by Datasette
2   
3   This is a modified version of the [Vanilla JavaScript TodoMVC Example](https://github.com/tastejs/todomvc/tree/gh-pages/examples/vanillajs) adapted to demonstrate the [Datasette 1.0 alpha JSON API](https://docs.datasette.io/en/latest/changelog.html#a0-2022-11-29).
4   
5   To try this out, visit https://todomvc.datasette.io/

tableau-to-sqlite/tests/test_tableau_to_sqlite.py

1   from click.testing import CliRunner
2   from tableau_to_sqlite.cli import cli
3   import pathlib
9   
10  @vcr.use_cassette(str(fixtures / "cassette.yml"))
11  def test_run(tmpdir):
12      runner = CliRunner()
13      db_path = str(tmpdir / "tableau.db")

tableau-to-sqlite/setup.py

33      """,
34      install_requires=["click", "TableauScraper==0.1.3"],
35      extras_require={"test": ["pytest", "vcrpy"]},
36      tests_require=["tableau-to-sqlite[test]"],
37      python_requires=">=3.6",
38  )

tableau-to-sqlite/README.md

66      pipenv shell
67  
68  Now install the dependencies and tests:
69  
70      pip install -e '.[test]'
71  
72  To run the tests:
73  
74      pytest

til/yaml/yamlfmt.md

9   They suggest this:
10  ```bash
11  go install github.com/google/yamlfmt/cmd/yamlfmt@latest
12  ```
13  This worked on my machine because I had Go installed via Homebrew (`go version go1.20.4 darwin/arm64` according to `go version`).
35      types: [created]
36  jobs:
37    test:
38      runs-on: ubuntu-latest
39      strategy:
40        matrix:
71  
72  jobs:
73    test:
74      runs-on: ubuntu-latest
75      strategy:
76        matrix:

til/typescript/basic-tsc.md

22      % ./node_modules/.bin/tsc --init
23  
24  Next step: create a `.ts` file to start testing it out. I put the following in `greetings.ts`:
25  
26  ```typescript

til/sqlite/list-all-columns-in-a-database.md

3   Here's a devious trick for listing ALL columns in a SQLite database, using a SQL query that generates another SQL query.
4   
5   The first query ([demo](https://latest.datasette.io/fixtures?sql=select+group_concat%28%0D%0A++%22select+%27%22+%7C%7C+name+%7C%7C+%22%27+as+table_name%2C+*+from+pragma_table_info%28%27%22+%7C%7C+name+%7C%7C+%22%27%29%22%0D%0A%2C+%27+union+%27%29+%7C%7C+%27+order+by+table_name%2C+cid%27%0D%0A++from+sqlite_master+where+type+%3D+%27table%27%3B)):
6   
7   ```sql
11    from sqlite_master where type = 'table';
12  ```
13  This outputs the second query, which will look something like this ([demo](https://latest.datasette.io/fixtures?sql=select+%27simple_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27simple_primary_key%27%29+union+select+%27primary_key_multiple_columns%27+as+table_name%2C+*+from+pragma_table_info%28%27primary_key_multiple_columns%27%29+union+select+%27primary_key_multiple_columns_explicit_label%27+as+table_name%2C+*+from+pragma_table_info%28%27primary_key_multiple_columns_explicit_label%27%29+union+select+%27compound_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27compound_primary_key%27%29+union+select+%27compound_three_primary_keys%27+as+table_name%2C+*+from+pragma_table_info%28%27compound_three_primary_keys%27%29+union+select+%27foreign_key_references%27+as+table_name%2C+*+from+pragma_table_info%28%27foreign_key_references%27%29+union+select+%27sortable%27+as+table_name%2C+*+from+pragma_table_info%28%27sortable%27%29+union+select+%27no_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27no_primary_key%27%29+union+select+%27123_starts_with_digits%27+as+table_name%2C+*+from+pragma_table_info%28%27123_starts_with_digits%27%29+union+select+%27Table+With+Space+In+Name%27+as+table_name%2C+*+from+pragma_table_info%28%27Table+With+Space+In+Name%27%29+union+select+%27table%2Fwith%2Fslashes.csv%27+as+table_name%2C+*+from+pragma_table_info%28%27table%2Fwith%2Fslashes.csv%27%29+union+select+%27complex_foreign_keys%27+as+table_name%2C+*+from+pragma_table_info%28%27complex_foreign_keys%27%29+union+select+%27custom_foreign_key_label%27+as+table_name%2C+*+from+pragma_table_info%28%27custom_foreign_key_label%27%29+union+select+%27units%27+as+table_name%2C+*+from+pragma_table_info%28%27units%27%29+union+select+%27tags%27+as+table_name%2C+*+from+pragma_table_info%28%27tags%27%29+union+select+%27searchable%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable%27%29+union+select+%27searchable_tags%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_tags%27%29+union+select+%27searchable_fts%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts%27%29+union+select+%27searchable_fts_content%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_content%27%29+union+select+%27searchable_fts_segments%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_segments%27%29+union+select+%27searchable_fts_segdir%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_segdir%27%29+union+select+%27select%27+as+table_name%2C+*+from+pragma_table_info%28%27select%27%29+union+select+%27infinity%27+as+table_name%2C+*+from+pragma_table_info%28%27infinity%27%29+union+select+%27facet_cities%27+as+table_name%2C+*+from+pragma_table_info%28%27facet_cities%27%29+union+select+%27facetable%27+as+table_name%2C+*+from+pragma_table_info%28%27facetable%27%29+union+select+%27binary_data%27+as+table_name%2C+*+from+pragma_table_info%28%27binary_data%27%29+union+select+%27roadside_attractions%27+as+table_name%2C+*+from+pragma_table_info%28%27roadside_attractions%27%29+union+select+%27attraction_characteristic%27+as+table_name%2C+*+from+pragma_table_info%28%27attraction_characteristic%27%29+union+select+%27roadside_attraction_characteristics%27+as+table_name%2C+*+from+pragma_table_info%28%27roadside_attraction_characteristics%27%29+order+by+table_name%2C+cid)):
14  ```sql
15  select 'simple_primary_key' as table_name, * from pragma_table_info('simple_primary_key') union
156   sqlite_master.name
157 ```
158 [Demo](https://latest.datasette.io/fixtures?sql=select%0D%0A++sqlite_master.name+as+table_name%2C%0D%0A++table_info.*%0D%0Afrom%0D%0A++sqlite_master%0D%0A++join+pragma_table_info%28sqlite_master.name%29+as+table_info%0D%0Aorder+by%0D%0A++sqlite_master.name%2C%0D%0A++table_info.cid).
159 
160 This works with the `pragma_table_info` and `pragma_index_list` and `pragma_foreign_key_list` functions too.
178   columnName
179 ```
180 [Demo](https://latest.datasette.io/fixtures?sql=SELECT+m.name+as+tableName%2C+p.name+as+columnName%0D%0A++++++++FROM+sqlite_master+m%0D%0A++++++++LEFT+OUTER+JOIN+pragma_table_info%28%28m.name%29%29+p+ON+m.name+%3C%3E+p.name%0D%0A++++++++WHERE+m.type+IN+%28%27table%27%2C%27view%27%29+AND+m.name+NOT+LIKE+%27sqlite_%25%27%0D%0A++++++++ORDER+BY+tableName%2C+columnName).

til/sqlite/unix-timestamp-milliseconds-sqlite.md

16  )
17  ```
18  [Try these both here](https://latest.datasette.io/_memory?sql=select%0D%0A++strftime%28%27%25s%27%2C+%27now%27%29+as+seconds_since_epoch%2C%0D%0A++cast%28%28julianday%28%27now%27%29+-+2440587.5%29+*+86400+*+1000+as+integer%29+as+ms_since_epoch%3B).
19  
20  In SQLite 3.42.0 and higher you can do this instead:
30  select strftime('%Y-%m-%d %H:%M:%S', :timestamp_ms / 1000, 'unixepoch')
31  ```
32  The output looks like this: `2023-04-09 05:04:24` - [try that out here](https://latest.datasette.io/_memory?sql=select+strftime%28%27%25Y-%25m-%25d+%25H%3A%25M%3A%25S%27%2C+%3Atimestamp_ms+%2F+1000%2C+%27unixepoch%27%29%0D%0A&timestamp_ms=1681016664769).
33  
34  ## Why not multiply seconds by 1000?
86    ) as timestamp_ms
87  ```
88  [Try that here](https://latest.datasette.io/_memory?sql=select%0D%0A++(1000+*+(strftime(%27%25s%27%2C+%27now%27)))+%2B+cast(%0D%0A++++substr(%0D%0A++++++strftime(%27%25f%27%2C+%27now%27)%2C%0D%0A++++++instr(strftime(%27%25f%27%2C+%27now%27)%2C+%27.%27)+%2B+1%0D%0A++++)+as+integer%0D%0A++)+as+timestamp_ms).
89  
90  The `substr('18.413', instr('18.413', '.') + 1)` part returns just the characters after the first `.` character, which are then cast to integer to get the milliseconds fraction of that second. These are added to `1000 * ` the unix timestamp in seconds.

til/sqlite/track-timestamped-changes-to-a-table.md

75  INSERT INTO foo VALUES ('hello4');
76  ```
77  To test this I ran `sqlite3` (with no arguments, which provides an in-memory database to play with), pasted in the above and then ran this:
78  ```
79  sqlite> .headers on

til/sqlite/text-value-is-integer-or-float.md

17  The `|| '.0'` bit there is needed because `cast('1' as REAL)` returns `1.0`, not just `1`.
18  
19  (Note that `1.200` will not pass this test and will be incorrectly considered an invalid floating point representation)
20  
21  ## Demos
42    )
43  ```
44  [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++value%2C%0D%0A++cast%28cast%28value+AS+REAL%29+AS+TEXT%29+in+%28value%2C+value+%7C%7C+%27.0%27%29+as+is_valid_float%0D%0Afrom%0D%0A++%28%0D%0A++++select%0D%0A++++++%271%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%271.1%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%27dog%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++null+as+value%0D%0A++%29)
45  
46  | value | is_valid_float |
71    )
72  ```
73  [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++value%2C%0D%0A++cast%28cast%28value+AS+INTEGER%29+AS+TEXT%29+%3D+value+as+is_valid_int%0D%0Afrom%0D%0A++%28%0D%0A++++select%0D%0A++++++%271%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%271.1%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%27dog%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++null+as+value%0D%0A++%29)
74  
75  | value | is_valid_int |

til/sqlite/steampipe.md

17  Let's start with the Hacker News API plugin, [turbot/steampipe-plugin-hackernews](https://github.com/turbot/steampipe-plugin-hackernews). I like this one because it doesn't require an API key.
18  
19  First grab the [latest release](https://github.com/turbot/steampipe-plugin-hackernews/releases/latest) of the extension. I'm on an M2 MacBook so I grabbed the `steampipe_sqlite_hackernews.darwin_arm64.tar.gz ` file:
20  
21  ```bash
82  Once you've jumped through the security hooks to enable an extension it can be used directly with Datasette as well. Let's try two at once - the Hacker News one and the [crt.sh](https://hub.steampipe.io/plugins/turbot/crtsh) plugin for querying certificate transparency logs.
83  
84  Download [the latest steampipe-plugin-crtsh](https://github.com/turbot/steampipe-plugin-crtsh/releases/latest) file - for macOS I used:
85  
86  ```bash

til/sqlite/subqueries-in-select.md

66    10
67  ```
68  [Try that here](https://latest-with-plugins.datasette.io/github?sql=select%0D%0A++repos.full_name%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++max%28created_at%29%0D%0A++++from%0D%0A++++++releases%0D%0A++++where%0D%0A++++++repo+%3D+repos.id%0D%0A++%29+as+max_created_at%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++count%28*%29%0D%0A++++from%0D%0A++++++releases%0D%0A++++where%0D%0A++++++repo+%3D+repos.id%0D%0A++%29+as+releases_count%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++json_group_array%28%0D%0A++++++++json_object%28%0D%0A++++++++++%27id%27%2C%0D%0A++++++++++id%2C%0D%0A++++++++++%27name%27%2C%0D%0A++++++++++name%2C%0D%0A++++++++++%27created_at%27%2C%0D%0A++++++++++created_at%0D%0A++++++++%29%0D%0A++++++%29%0D%0A++++from%0D%0A++++++%28%0D%0A++++++++select%0D%0A++++++++++*%0D%0A++++++++from%0D%0A++++++++++releases%0D%0A++++++++where%0D%0A++++++++++repo+%3D+repos.id%0D%0A++++++++order+by%0D%0A++++++++++created_at+desc%0D%0A++++++++limit%0D%0A++++++++++3%0D%0A++++++%29%0D%0A++%29+as+recent_releases%0D%0Afrom%0D%0A++repos%0D%0Aorder+by%0D%0A++releases_count+desc%0D%0Alimit%0D%0A++10).
69  
70  Here are the first three rows of the output. The `recent_releases` column includes a JSON array with details of the three most recent releases for each of those repositories.
214 limit 10;
215 ```
216 [Try that out here](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28*%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc%0D%0Alimit+10%3B).
217 
218 The key trick here is the `rank() over` line:
221 rank() over (partition by repos.id order by releases.created_at desc) as rel_rank
222 ```
223 This adds an integer called `rel_rank` to each row in that CTE showing the relative ranking of each release, ordered by their created date. [This version of the query](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28*%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect+*+from+cte) shows the contents of the `cte` table, which starts like this:
224 
225 | full_name                      | max_created_at       |   releases_count |   rel_id | rel_name                                                       | rel_created_at       |   rel_rank |
257     left join releases on releases.repo = repos.id
258 ```
259 This almost has the desired effect, but if [you run it](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+(%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max(releases.created_at)+over+(partition+by+repos.id)+as+max_created_at%2C%0D%0A++++count(*)+over+(partition+by+repos.id)+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank()+over+(partition+by+repos.id+order+by+releases.created_at+desc)+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A)%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array(%0D%0A++++json_object(%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++)%0D%0A++)+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc) you'll notice that it returns rows that look like this:
260 
261 | full_name                                                            | max_created_at       |   releases_count | recent_releases                                                    |
320 order by releases_count desc
321 ```
322 [Try that finished query here](https://latest-with-plugins.datasette.io/github?sql=%0D%0Awith+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28releases.id%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+filter+%28where+rel_id+is+not+null%29+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc).
323 
324 ## Simplified to use just one window function
353 order by releases_count desc
354 ```
355 [Try that version here](https://latest-with-plugins.datasette.io/github?sql=%0D%0Awith+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++releases.created_at%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max%28created_at%29+as+max_created_at%2C%0D%0A++count%28rel_id%29+as+releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+filter+%28where+rel_id+is+not+null+and+rel_rank+%3C%3D+3%29+as+recent_releases%0D%0Afrom+cte%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc).
356 
357 The trick here is to select the `created_at` and `releases.id` columns in the initial CTE, then use `max(created_at) as max_created_at` and `count(rel_id) as releases_count` in the second section of the query, in order to total things up based on the group by.

til/sqlite/sqlite-tg.md

128 I decided to try implementing a version of that on top of `sqlite-tg`.
129 
130 I grabbed the latest release of `timezones.geojson.zip` from [evansiroky/timezone-boundary-builder/](https://github.com/evansiroky/timezone-boundary-builder/releases) and unzipped it to get a `combined.geojson` file that starts like this:
131 
132 ```json

til/sqlite/splitting-commas-sqlite.md

25  | x12 | y1234 | z12345 |
26  
27  Here's [a live demo of the query](https://latest.datasette.io/fixtures?sql=with+comma_locations+as+%28%0D%0A++select+instr%28%3Apath%2C+%27%2C%27%29+as+first_comma%2C%0D%0A++instr%28%3Apath%2C+%27%2C%27%29+%2B+instr%28substr%28%3Apath%2C+instr%28%3Apath%2C+%27%2C%27%29+%2B+1%29%2C+%27%2C%27%29+as+second_comma%0D%0A%29%2C+variables+as+%28%0D%0A++select%0D%0A++++substr%28%3Apath%2C+0%2C+first_comma%29+as+first%2C%0D%0A++++substr%28%3Apath%2C+first_comma+%2B+1%2C+second_comma+-+first_comma+-+1%29+as+second%2C%0D%0A++++substr%28%3Apath%2C+second_comma+%2B+1%29+as+third%0D%0A++from+comma_locations%0D%0A%29%0D%0Aselect+*+from+variables&path=x12%2Cy1234%2Cz12345).

til/sqlite/simple-recursive-cte.md

10  select * from counter limit 5;
11  ```
12  This query [returns five rows](https://latest.datasette.io/_memory?sql=with+recursive+counter%28x%29+as+%28%0D%0A++select+0%0D%0A++++union%0D%0A++select+x+%2B+1+from+counter%0D%0A%29%0D%0Aselect+*+from+counter+limit+10%3B) from a single column `x` - from 0 to 4.
13  
14  |   x |

til/sqlite/replicating-rqlite.md

5   By default `rqlite` asks you to use its own custom HTTP API - but I wanted to try running it against Datasette. rqlite author Philip O'Toole confirmed that [this should work](https://twitter.com/general_order24/status/1343619601758908419) provided any writes go through the API - each node can be configured to write to an on-disk database file which Datasette can then read from (the default is to use in-memory databases and an on-disk Raft log).
6   
7   Here's how I got that working on my macOS laptop. I used the latest macOS binary from https://github.com/rqlite/rqlite/releases (`rqlite` is written in Go and provides pre-complied binaries for different systems).
8   
9       cd /tmp

til/sqlite/related-content.md

16  ```sql
17  select title, rank from til_fts where til_fts match '
18    i OR wanted OR to OR run OR some OR django OR tests OR using OR
19    pytestdjango OR and OR with OR configured OR pick OR up OR the
20    OR databaseurl OR environment OR variable OR via OR djdatabaseurl
21    OR against OR a OR postgresql OR server OR running OR in OR
25  ```
26  
27  Here are the results from [that query](https://til.simonwillison.net/tils?sql=select+title%2C+rank+from+til_fts+where+til_fts+match+%27%0D%0A++i+OR+wanted+OR+to+OR+run+OR+some+OR+django+OR+tests+OR+using+OR%0D%0A++pytestdjango+OR+and+OR+with+OR+configured+OR+pick+OR+up+OR+the%0D%0A++OR+databaseurl+OR+environment+OR+variable+OR+via+OR+djdatabaseurl%0D%0A++OR+against+OR+a+OR+postgresql+OR+server+OR+running+OR+in+OR%0D%0A++github+OR+actions+OR+it+OR+took+OR+while+OR+figure+OR+out+OR%0D%0A++right+OR+pattern+OR+trick+OR+was+OR+define+OR+postgres+OR+service%27%0D%0Aorder+by+rank+limit+5). Unsurprisingly the entry itself shows up first, but the other items look relevant enough to me:
28  
29  title | rank
30  -- | --
31  Running tests against PostgreSQL in a service container | -61.04335068286244
32  Talking to a PostgreSQL service container from inside a Docker container | -37.54518907167069
33  Allowing a container in Docker Desktop for Mac to talk to a PostgreSQL server on the host machine | -29.712660785491842
60    5
61  ```
62  And [an example of it running](https://til.simonwillison.net/tils?sql=select%0D%0A++til.topic%2C+til.slug%2C+til.title%2C+til.created%0D%0Afrom%0D%0A++til%0D%0A++join+til_fts+on+til.rowid+%3D+til_fts.rowid%0D%0Awhere%0D%0A++til_fts+match+%3Awords%0D%0A++and+not+(%0D%0A++++til.slug+%3D+%3Aslug%0D%0A++++and+til.topic+%3D+%3Atopic%0D%0A++)%0D%0Aorder+by%0D%0A++til_fts.rank%0D%0Alimit%0D%0A++5&words=i+OR+wanted+OR+to+OR+run+OR+some+OR+django+OR+tests+OR+using+OR+++pytestdjango+OR+and+OR+with+OR+configured+OR+pick+OR+up+OR+the+++OR+databaseurl+OR+environment+OR+variable+OR+via+OR+djdatabaseurl+++OR+against+OR+a+OR+postgresql+OR+server+OR+running+OR+in+OR+++github+OR+actions+OR+it+OR+took+OR+while+OR+figure+OR+out+OR+++right+OR+pattern+OR+trick+OR+was+OR+define+OR+postgres+OR+service&slug=postgresq-service-container&topic=github-actions), which returns the following:
63  
64  topic | slug | title | created

til/sqlite/pragma-function-list.md

33  select json_extract('{"foo": "bar", "bar": "baz"}', '$.foo', '$.bar') as bits
34  ```
35  This [returns](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%22foo%22%3A+%22bar%22%2C+%22bar%22%3A+%22baz%22%7D%27%2C+%27%24.foo%27%2C+%27%24.bar%27%29+as+bits) a JSON array corresponding to each argument past the first one:
36  
37  ```json
68    pragma_function_list()
69  ```
70  [Run against a Datasette instance](https://latest.datasette.io/_memory?sql=select%0D%0A++name%2C%0D%0A++narg%2C%0D%0A++flags%2C%0D%0A++type%2C%0D%0A++flags+%26+0x000000800+%21%3D+0+as+deterministic%2C%0D%0A++flags+%26+0x000080000+%21%3D+0+as+directonly%2C%0D%0A++flags+%26+0x000100000+%21%3D+0+as+subtype%2C%0D%0A++flags+%26+0x000200000+%21%3D+0+as+innocuous%0D%0Afrom%0D%0A++pragma_function_list%28%29) returns the following data (truncated to the highlights):
71  
72  | name | narg | flags | type | deterministic | directonly | subtype | innocuous |

til/sqlite/one-line-csv-operations.md

13        'SELECT passenger_count, COUNT(*), AVG(total_amount) FROM taxi GROUP BY passenger_count'
14  
15  You can get `taxi.csv` by downloading the compressed file from [here](https://github.com/multiprocessio/dsq/blob/43e72ff1d2c871082fed0ae401dd59e2ff9f6cfe/testdata/taxi.csv.7z) and running:
16  
17      7z e -aos taxi.csv.7z

til/sqlite/now-argument-stability.md

18  That `strftime()` pattern is [described here](https://stackoverflow.com/questions/17574784/sqlite-current-timestamp-with-milliseconds/56895050#56895050) and [in this TIL](https://til.simonwillison.net/sqlite/track-timestamped-changes-to-a-table), it returns the current Unix timestamp in milliseconds.
19  
20  The [result of the above query](https://latest.datasette.io/_memory?sql=select+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29+as+t1%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29) is:
21  
22  | t1 |

til/sqlite/ld-preload.md

44      cd SQLite-cf538e27
45  
46  Now we can build the extension. The `CPPFLAGS` are optional but I found I needed them to get the full Datasette test suite to pass later on:
47  
48      CPPFLAGS="-DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS3_PARENTHESIS -DSQLITE_ENABLE_RTREE=1" ./configure
142 ```
143 
144 ## Running the Datasette tests
145 
146 To run Datasette's test suite I needed to install a few extra dependencies:
147 
148 ```
154 source venv/bin/activate
155 pip install wheel # So bdist_wheel works in next step
156 pip install -e '.[test]'
157 LD_PRELOAD=/tmp/SQLite-cf538e27/.libs/libsqlite3.so python3 -c 
158 ```

til/sqlite/json-extract-path.md

32  }
33  ```
34  - `$.creatures` returns the JSON array ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures))
35  - `$.creatures[0].name` returns `Cleo` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B0%5D.name))
36  - `$.creatures[1]."weight.lb"` returns `1.6` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B1%5D.%22weight.lb%22))
37  
38  ## \#-1 to access arrays by index from the end
44  But... you can apply a single integer subtraction operation to that `#` - so you can return the name of the last creature in the array using this:
45  
46  - `$.creatures[#-1].name` returns `Azi` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B%23-1%5D.name))
47  
48  Here's [the commit](https://sqlite.org/src/info/35ed68a651f) that added that custom SQLite extension in 2019.
54  `$.has\" quotes in it`
55  
56  For example ([demo](https://latest.datasette.io/_memory?sql=select+json_extract(%27%7B%0D%0A++++%22has%5C%22+quotes+in+it%22:+%22hello%22%0D%0A%7D%27,+%27$.has%5C%22+quotes+in+it%27)&path=$.has%5C%22+quotes+in+it)):
57  
58  ```sql
65  ## Source code
66  
67  The latest source code for the JSON module can be found in [ext/misc/json.c](https://www3.sqlite.org/src/file?name=ext/misc/json.c) - in particular the `static JsonNode *jsonLookup(...)
68  ` function.
69  
70  The unit tests are really useful - those are spread across these six files:
71  
72  - [test/json1.test](https://www3.sqlite.org/src/file?name=test/json1.test)
73  - [test/json101.test](https://www3.sqlite.org/src/file?name=test/json101.test)
74  - [test/json102.test](https://www3.sqlite.org/src/file?name=test/json102.test)
75  - [test/json103.test](https://www3.sqlite.org/src/file?name=test/json103.test)
76  - [test/json104.test](https://www3.sqlite.org/src/file?name=test/json104.test)
77  - [test/json105.test](https://www3.sqlite.org/src/file?name=test/json105.test) - this one has the tests for `[#]` syntax.

til/sqlite/function-list.md

14  select * from pragma_function_list()
15  ```
16  [Here's that for latest.datasette.io](https://latest.datasette.io/fixtures?sql=select+*+from+pragma_function_list%28%29) (truncated):
17  
18  | name                          |   builtin | type   | enc   |   narg |   flags |
30  select "'" || group_concat(name, "', '") || "'" from pragma_function_list()
31  ```
32  Run [against latest.datasette.io](https://latest.datasette.io/fixtures?sql=select+%22%27%22+||+group_concat(name%2C+%22%27%2C+%27%22)+||+%22%27%22+from+pragma_function_list()) this returned the following:
33  
34  `'pow', 'group_concat', 'group_concat', 'json_type', 'json_type', 'julianday', 'ntile', 'nullif', 'sqlite_compileoption_get', 'json_valid', 'json_quote', 'json_patch', '->', 'json_array', 'current_timestamp', 'power', 'sqlite_compileoption_used', 'json_remove', 'json_object', 'json_insert', '->>', 'sin', 'sum', 'quote', 'printf', 'likelihood', 'json_replace', 'json_extract', 'last_value', 'rank', 'sign', 'sqrt', 'sinh', 'tan', 'round', 'round', 'rtrim', 'rtrim', 'nth_value', 'tanh', 'random', 'trim', 'trim', 'time', 'radians', 'trunc', 'total', 'substr', 'substr', 'replace', 'upper', 'subtype', 'typeof', 'load_extension', 'load_extension', 'soundex', 'json_group_array', 'avg', 'abs', 'json_group_object', 'json_array_length', 'json_array_length', 'strftime', 'atan', 'asin', 'acos', 'substring', 'substring', 'randomblob', 'unicode', 'percent_rank', 'row_number', 'atanh', 'asinh', 'acosh', 'cos', 'atan2', 'last_insert_rowid', 'sqlite_log', 'unlikely', 'cosh', 'ceil', 'char', 'unixepoch', 'exp', 'count', 'count', 'date', 'ceiling', 'total_changes', 'changes', 'sqlite_version', 'degrees', 'floor', 'coalesce', 'glob', 'zeroblob', 'hex', 'iif', 'sqlite_source_id', 'format', 'datetime', 'cume_dist', 'ln', 'instr', 'json', 'dense_rank', 'log', 'log', 'ifnull', 'current_date', 'current_time', 'lag', 'lag', 'lag', 'mod', 'log2', 'like', 'like', 'max', 'max', 'min', 'min', 'lead', 'lead', 'lead', 'log10', 'lower', 'ltrim', 'ltrim', 'first_value', 'pi', 'length', 'likely', 'json_set', 'escape_fts', 'prepare_connection_args', 'convert_units', 'sleep', 'rtreedepth', 'match', 'snippet', 'fts5_source_id', 'offsets', 'matchinfo', 'matchinfo', 'optimize', 'rtreecheck', 'rtreenode', 'highlight', 'bm25', 'fts3_tokenizer', 'fts3_tokenizer', 'fts5'`
36  ## Comparing via copy-and-paste
37  
38  To see the functions that were registered for https://sqlite-extension-examples.fly.dev/ but not for https://latest.datasette.io/ I used the above output to construct the following query:
39  
40  `select name from pragma_function_list() where name not in ('pow', 'group_concat', 'group_concat', 'json_type', 'json_type', 'julianday', 'ntile', 'nullif', 'sqlite_compileoption_get', 'json_valid', 'json_quote', 'json_patch', '->', 'json_array', 'current_timestamp', 'power', 'sqlite_compileoption_used', 'json_remove', 'json_object', 'json_insert', '->>', 'sin', 'sum', 'quote', 'printf', 'likelihood', 'json_replace', 'json_extract', 'last_value', 'rank', 'sign', 'sqrt', 'sinh', 'tan', 'round', 'round', 'rtrim', 'rtrim', 'nth_value', 'tanh', 'random', 'trim', 'trim', 'time', 'radians', 'trunc', 'total', 'substr', 'substr', 'replace', 'upper', 'subtype', 'typeof', 'load_extension', 'load_extension', 'soundex', 'json_group_array', 'avg', 'abs', 'json_group_object', 'json_array_length', 'json_array_length', 'strftime', 'atan', 'asin', 'acos', 'substring', 'substring', 'randomblob', 'unicode', 'percent_rank', 'row_number', 'atanh', 'asinh', 'acosh', 'cos', 'atan2', 'last_insert_rowid', 'sqlite_log', 'unlikely', 'cosh', 'ceil', 'char', 'unixepoch', 'exp', 'count', 'count', 'date', 'ceiling', 'total_changes', 'changes', 'sqlite_version', 'degrees', 'floor', 'coalesce', 'glob', 'zeroblob', 'hex', 'iif', 'sqlite_source_id', 'format', 'datetime', 'cume_dist', 'ln', 'instr', 'json', 'dense_rank', 'log', 'log', 'ifnull', 'current_date', 'current_time', 'lag', 'lag', 'lag', 'mod', 'log2', 'like', 'like', 'max', 'max', 'min', 'min', 'lead', 'lead', 'lead', 'log10', 'lower', 'ltrim', 'ltrim', 'first_value', 'pi', 'length', 'likely', 'json_set', 'escape_fts', 'prepare_connection_args', 'convert_units', 'sleep', 'rtreedepth', 'match', 'snippet', 'fts5_source_id', 'offsets', 'matchinfo', 'matchinfo', 'optimize', 'rtreecheck', 'rtreenode', 'highlight', 'bm25', 'fts3_tokenizer', 'fts3_tokenizer', 'fts5')`
41  
42  This [returned]([https://latest.datasette.io/fixtures?sql=select+%22%27%22+||+group_concat(name%2C+%22%27%2C+%27%22)+||+%22%27%22+from+pragma_function_list()](https://sqlite-extension-examples.fly.dev/data?sql=select+name+from+pragma_function_list%28%29+where+name+not+in+%28%27pow%27%2C+%27group_concat%27%2C+%27group_concat%27%2C+%27json_type%27%2C+%27json_type%27%2C+%27julianday%27%2C+%27ntile%27%2C+%27nullif%27%2C+%27sqlite_compileoption_get%27%2C+%27json_valid%27%2C+%27json_quote%27%2C+%27json_patch%27%2C+%27-%3E%27%2C+%27json_array%27%2C+%27current_timestamp%27%2C+%27power%27%2C+%27sqlite_compileoption_used%27%2C+%27json_remove%27%2C+%27json_object%27%2C+%27json_insert%27%2C+%27-%3E%3E%27%2C+%27sin%27%2C+%27sum%27%2C+%27quote%27%2C+%27printf%27%2C+%27likelihood%27%2C+%27json_replace%27%2C+%27json_extract%27%2C+%27last_value%27%2C+%27rank%27%2C+%27sign%27%2C+%27sqrt%27%2C+%27sinh%27%2C+%27tan%27%2C+%27round%27%2C+%27round%27%2C+%27rtrim%27%2C+%27rtrim%27%2C+%27nth_value%27%2C+%27tanh%27%2C+%27random%27%2C+%27trim%27%2C+%27trim%27%2C+%27time%27%2C+%27radians%27%2C+%27trunc%27%2C+%27total%27%2C+%27substr%27%2C+%27substr%27%2C+%27replace%27%2C+%27upper%27%2C+%27subtype%27%2C+%27typeof%27%2C+%27load_extension%27%2C+%27load_extension%27%2C+%27soundex%27%2C+%27json_group_array%27%2C+%27avg%27%2C+%27abs%27%2C+%27json_group_object%27%2C+%27json_array_length%27%2C+%27json_array_length%27%2C+%27strftime%27%2C+%27atan%27%2C+%27asin%27%2C+%27acos%27%2C+%27substring%27%2C+%27substring%27%2C+%27randomblob%27%2C+%27unicode%27%2C+%27percent_rank%27%2C+%27row_number%27%2C+%27atanh%27%2C+%27asinh%27%2C+%27acosh%27%2C+%27cos%27%2C+%27atan2%27%2C+%27last_insert_rowid%27%2C+%27sqlite_log%27%2C+%27unlikely%27%2C+%27cosh%27%2C+%27ceil%27%2C+%27char%27%2C+%27unixepoch%27%2C+%27exp%27%2C+%27count%27%2C+%27count%27%2C+%27date%27%2C+%27ceiling%27%2C+%27total_changes%27%2C+%27changes%27%2C+%27sqlite_version%27%2C+%27degrees%27%2C+%27floor%27%2C+%27coalesce%27%2C+%27glob%27%2C+%27zeroblob%27%2C+%27hex%27%2C+%27iif%27%2C+%27sqlite_source_id%27%2C+%27format%27%2C+%27datetime%27%2C+%27cume_dist%27%2C+%27ln%27%2C+%27instr%27%2C+%27json%27%2C+%27dense_rank%27%2C+%27log%27%2C+%27log%27%2C+%27ifnull%27%2C+%27current_date%27%2C+%27current_time%27%2C+%27lag%27%2C+%27lag%27%2C+%27lag%27%2C+%27mod%27%2C+%27log2%27%2C+%27like%27%2C+%27like%27%2C+%27max%27%2C+%27max%27%2C+%27min%27%2C+%27min%27%2C+%27lead%27%2C+%27lead%27%2C+%27lead%27%2C+%27log10%27%2C+%27lower%27%2C+%27ltrim%27%2C+%27ltrim%27%2C+%27first_value%27%2C+%27pi%27%2C+%27length%27%2C+%27likely%27%2C+%27json_set%27%2C+%27escape_fts%27%2C+%27prepare_connection_args%27%2C+%27convert_units%27%2C+%27sleep%27%2C+%27rtreedepth%27%2C+%27match%27%2C+%27snippet%27%2C+%27fts5_source_id%27%2C+%27offsets%27%2C+%27matchinfo%27%2C+%27matchinfo%27%2C+%27optimize%27%2C+%27rtreecheck%27%2C+%27rtreenode%27%2C+%27highlight%27%2C+%27bm25%27%2C+%27fts3_tokenizer%27%2C+%27fts3_tokenizer%27%2C+%27fts5%27%29)) the following list:
43  
44  ```
86  from pragma_function_list()
87  ```
88  [Try that against latest.datasette.io](https://latest.datasette.io/_memory?sql=select+json_group_array%28distinct+name%29%0D%0Afrom+pragma_function_list%28%29)
89  
90  Output:

til/sql/consecutive-groups.md

227   start desc;
228 ```
229 Grouping by `country, grp` returns a single row per group. The `min(created)` and `max(created)` functions can then find  the earliest and latest dates in each group.
230 
231 I'm using `date()` to turn those `created` timestamps into `YYYY-MM-DD` dates.

til/sqlite/cte-values.md

11  select * from x
12  ```
13  [Try that here](https://latest.datasette.io/fixtures?sql=with+x%28c1%2C+c2%2C+c3%29+as+%28%0D%0A++values%0D%0A++++%28%27a%27%2C+%27b%27%2C+3%29%2C%0D%0A++++%28%27b%27%2C+%27c%27%2C+4%29%0D%0A%29%0D%0Aselect+*+from+x).
14  
15  The output of this query is:

til/sqlite/cr-sqlite-macos.md

14  
15  1. Install a SQLite version that isn't the macOS default, which doesn't allow loading extensions. I used `brew install sqlite3` for this.
16  2. Download the [latest release](https://github.com/vlcn-io/cr-sqlite/releases) - at time of writing for my M2 Mac that's [v0.15.1/crsqlite-darwin-aarch64.dylib](https://github.com/vlcn-io/cr-sqlite/releases/download/v0.15.1/crsqlite-darwin-aarch64.dylib).
17  3. `cd` to the same folder as the download, then run:
18      ```bash

til/sqlite/column-combinations.md

32    num_rows desc
33  ```
34  [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++++case+when+%5Bpk%5D+is+not+null+then+%27pk%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bcreated%5D+is+not+null+then+%27created%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bplanet_int%5D+is+not+null+then+%27planet_int%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bon_earth%5D+is+not+null+then+%27on_earth%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bstate%5D+is+not+null+then+%27state%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5B_city_id%5D+is+not+null+then+%27_city_id%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5B_neighborhood%5D+is+not+null+then+%27_neighborhood%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Btags%5D+is+not+null+then+%27tags%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bcomplex_array%5D+is+not+null+then+%27complex_array%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bdistinct_some_null%5D+is+not+null+then+%27distinct_some_null%2C+%27+else+%27%27+end%0D%0A++as+columns%2C%0D%0A++count%28*%29+as+num_rows%0D%0Afrom%0D%0A++%5Bfacetable%5D%0D%0Agroup+by%0D%0A++columns%0D%0Aorder+by%0D%0A++num_rows+desc).
35  
36  This has the desired effect: it gives me back all of the combinations of not-null columns in the table, with a count for each one.
53    num_rows desc' as query from pragma_table_info(:table)
54  ```
55  [Try that out](https://latest-with-plugins.datasette.io/fixtures?sql=select+%27select%0D%0A%27+%7C%7C+group_concat%28%27++++case+when+%5B%27+%7C%7C+name+%7C%7C+%27%5D+is+not+null+then+%27+%7C%7C+quote%28name+%7C%7C+%27%2C+%27%29+%7C%7C+%27+else+%27%27%27%27+end%27%2C+%27+%7C%7C%0D%0A%27%29+%7C%7C+%27%0D%0A++as+columns%2C%0D%0A++count%28*%29+as+num_rows%0D%0Afrom%0D%0A++%5B%27+%7C%7C+%3Atable+%7C%7C+%27%5D%0D%0Agroup+by%0D%0A++columns%0D%0Aorder+by%0D%0A++num_rows+desc%27+as+query+from+pragma_table_info%28%3Atable%29&table=facetable) in a demo that includes the [datasette-query-links](https://datasette.io/plugins/datasette-query-links) plugin.
56  This takes `:table` as an input and generates SQL which can be used to generate column-combination counts.

til/sqlite/build-specific-sqlite-pysqlite-macos.md

1   # Building a specific version of SQLite with pysqlite on macOS/Linux
2   
3   I wanted the ability to test my Python software against specific version of SQLite on macOS. I found a way to do that using [pysqlite3](https://github.com/coleifer/pysqlite3).
4   
5   First, clone the GitHub mirror of SQLite (so I don't have to learn how to use Fossil):
31      python3 setup.py build_static build
32  
33  The end result sits in  a `pysqlite3` folder in, on my machine, `/tmp/pysqlite3/build/lib.macosx-10.15-x86_64-3.9` - test it like this:
34  
35      cd /tmp/pysqlite3/build/lib.macosx-10.15-x86_64-3.9
54      pip install /tmp/pysqlite3/dist/pysqlite3-0.4.6-cp39-cp39-macosx_10_15_x86_64.whl
55  
56  This exact same process works on Linux too (tested inside a default GitHub Actions Linux worker).

til/sphinx/sphinx-autodoc.md

1   # Adding Sphinx autodoc to a project, and configuring Read The Docs to build it
2   
3   I built a [new API reference page](https://sqlite-utils.datasette.io/en/latest/reference.html) today for `sqlite-utils`, using the Sphinx [autodoc extension](https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html) to extract docstrings from the code and use them to build a full class reference.
4   
5   I've avoided this kind of documentation in the past because I think narrative prose is a *much* better way of providing documentation - but ``sqlite-utils`` already has [detailed narrative prose](https://sqlite-utils.datasette.io/en/stable/python-api.html), so I felt that adding reference documentation powered by docstrings could enhance that project - while also providing better inline document for tools such as Visual Studio Code and Jupyter.
143 ## The end result
144 
145 The new page of documentation is now live at [en/latest/reference.html](https://sqlite-utils.datasette.io/en/latest/reference.html). The pull request in which I figured this all out is [sqlite-utils/pull/312](https://github.com/simonw/sqlite-utils/pull/312).

til/sphinx/literalinclude-with-markers.md

1   # literalinclude with markers for showing code in documentation
2   
3   I [wanted to include](https://github.com/simonw/datasette/issues/1830) some example Python tests in the Datasette documentation - but since they were tests, I also wanted to execute them as part of my test suite to make sure they worked correctly.
4   
5   I solved this with the Sphinx [literalinclude directive](https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude).
6   
7   Here's what I put in my [docs/testing_plugins.rst#L92-L111](https://github.com/simonw/datasette/blob/0f63cb83ed31753a9bd9ec5cc71de16906767337/docs/testing_plugins.rst#L92-L111) file:
8   ```rst
9   A simple test looks like this:
10  
11  .. literalinclude:: ../tests/test_docs.py
12     :language: python
13     :start-after: # -- start test_homepage --
14     :end-before: # -- end test_homepage --
15  
16  Or for a JSON API:
17  
18  .. literalinclude:: ../tests/test_docs.py
19     :language: python
20     :start-after: # -- start test_actor_is_null --
21     :end-before: # -- end test_actor_is_null --
22  
23  To make requests as an authenticated actor, create a signed ``ds_cookie`` using the ``datasette.client.actor_cookie()`` helper function and pass it in ``cookies=`` like this:
24  
25  .. literalinclude:: ../tests/test_docs.py
26     :language: python
27     :start-after: # -- start test_signed_cookie_actor --
28     :end-before: # -- end test_signed_cookie_actor --
29  ```
30  Note that the paths like `../tests/test_docs.py` are relative to the root `docs/` folder, which is a sibling of `tests/`.
31  
32  Then in [tests/test_docs.py](https://github.com/simonw/datasette/blob/0f63cb83ed31753a9bd9ec5cc71de16906767337/tests/test_docs.py#L109-L141):
33  ```python
34  # fmt: off
35  
36  # -- start test_homepage --
37  @pytest.mark.asyncio
38  async def test_homepage():
39      ds = Datasette(memory=True)
40      response = await ds.client.get("/")
41      html = response.text
42      assert "<h1>" in html
43  # -- end test_homepage --
44  
45  # -- start test_actor_is_null --
46  @pytest.mark.asyncio
47  async def test_actor_is_null():
48      ds = Datasette(memory=True)
49      response = await ds.client.get("/-/actor.json")
50      assert response.json() == {"actor": None}
51  # -- end test_actor_is_null --
52  
53  # -- start test_signed_cookie_actor --
54  @pytest.mark.asyncio
55  async def test_signed_cookie_actor():
56      ds = Datasette(memory=True)
57      cookies = {"ds_actor": ds.client.actor_cookie({"id": "root"})}
58      response = await ds.client.get("/-/actor.json", cookies=cookies)
59      assert response.json() == {"actor": {"id": "root"}}
60  # -- end test_signed_cookie_actor --
61  ```
62  The rendered documentation [can be seen here](https://docs.datasette.io/en/latest/testing_plugins.html#using-datasette-client-in-tests).
63  
64  The `# fmt: off` line at the start is an instruction to [Black](https://github.com/psf/black) to ignore that section of the file. Without that, running Black would insist on adding newlines before the closing `# -- end` comments, which would show up [as blank space in the documentation](https://github.com/simonw/datasette/issues/2231).

til/sphinx/blacken-docs.md

23  
24  ```python
25  @pytest.fixture
26  def datasette(tmp_path_factory):
27      # This fixture will be executed repeatedly for every test
28  ```
29  This is because of the missing function body. It turns out adding `...` (which looks prettier than `pass`) fixes this issue:
30  ```python
31  @pytest.fixture
32  def datasette(tmp_path_factory):
33      # This fixture will be executed repeatedly for every test
34      ...
35  ```

til/spatialite/minimal-spatialite-database-in-python.md

1   # Creating a minimal SpatiaLite database with Python
2   
3   When writing a test for [datasette-leaflet-freedraw](https://github.com/simonw/datasette-leaflet-freedraw) I realized I didn't have a simple tiny recipe for creating an in-memory SpatiaLite database in Python. I came up with this:
4   
5   ```python
26  /usr/local/lib/mod_spatialite.dylib
27  ```
28  I also remembered I have this script: [build_small_spatialite_db.py](https://github.com/simonw/datasette/blob/main/tests/build_small_spatialite_db.py)

til/spatialite/knn.md

1   # KNN queries with SpatiaLite
2   
3   The latest version of SpatiaLite adds KNN support, which makes it easy to efficiently answer the question "what are the X closest records to this point".
4   
5   The USGS earthquakes GeoJSON is a great dataset for experimenting with these features.
9   `https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson` currently contains 10,642 features.
10  
11  To turn that into a SpatiaLite database using the latest version of [geojson-to-sqlite](https://github.com/simonw/geojson-to-sqlite):
12  ```bash
13  curl 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson' | \

til/shot-scraper/axe-core.md

24  ```json
25  {
26      "testEngine": {
27          "name": "axe-core",
28          "version": "4.7.2"
29      },
30      "testRunner": {
31          "name": "axe"
32      },
33      "testEnvironment": {
34          "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/115.0.5790.75 Safari/537.36",
35          "windowWidth": 1280,

til/selenium/selenium-python-macos.md

92  print(firefox.find_element_by_css_selector('body').text)
93  ```
94  I used `wget` for the download (rather than clicking the link in my browser) thanks to the warning here: https://firefox-source-docs.mozilla.org/testing/geckodriver/Notarization.html
95  
96  An easier option: install it with Homebrew:

til/readthedocs/stable-docs.md

1   # Updating stable docs in ReadTheDocs without pushing a release
2   
3   I use [ReadTheDocs](https://readthedocs.org/) for several of my projects. It's fantastic: among other things, it makes it easy to publish the documentation for my latest `main` branch at `/latest/` and the documentation for my latest release at `/stable/` (as well as maintain archived tag URLs for every prior release). 
4   
5   I can then configure the main page of my project's documentation to redirect to `/stable/` by default.
30  After some [extensive experimentation](https://github.com/simonw/simonw-readthedocs-experiments/issues/1) (that's an issue thread with 43 comments, all by me) I've found a solution.
31  
32  The basic shape is this: rather than having ReadTheDocs serve `/stable/` from the latest tagged release of my project, I instead maintain a `stable` branch in the GitHub repository. It's this branch that becomes the default documentation on my documentation sites.
33  
34  Then I use GitHub Actions to automate the process of updating that branch. In particular:
59  jobs:
60    update_stable_docs:
61      runs-on: ubuntu-latest
62      steps:
63      - name: Checkout repository
214 docs/cli.rst
215 sqlite_utils/cli.py
216 tests/test_cli.py
217 ```
218 
249 Here's the sequence of steps.
250 
251 - Go to the project's admin page, click advanced settings and switch the default version from "stable" to "latest" - then scroll to the very bottom of the page to find the save button. This step is necessary because you can't delete the "stable" version if it is set as the default.
252 - Visit the Versions tab, edit the "stable" version and uncheck the "active" box. This is the same thing as deleting it.
253 - Find the "stable" branch on that page and activate that instead. This will trigger a build of your `stable` branch and cause it to be hosted at `/stable/` on ReadTheDocs.

til/readthedocs/pip-install-docs.md

10      # ...
11      extras_require={
12          "test": ["pytest", "pytest-asyncio", "black", "cogapp", "ruff"],
13          "docs": [
14              "sphinx==7.2.6",

til/readthedocs/link-from-latest-to-stable.md

1   # Linking from /latest/ to /stable/ on Read The Docs
2   
3   [Read The Docs](https://readthedocs.org/) has a handy feature where documentation for older versions will automatically link to the latest release, for example [on this page](https://docs.datasette.io/en/0.56/spatialite.html):
4   
5   <img width="978" alt="A documentation page with a note that says: You are not reading the most recent version of this documentation. 0.60 is the latest version available." src="https://user-images.githubusercontent.com/9599/150437341-14554fe7-1c47-4462-a1d9-9b8d822aaea8.png">
6   
7   That feature is enabled by a "Show version warning" check box in their Advanced Settings preference pane.
9   It's implemented by [this JavaScript](https://github.com/readthedocs/readthedocs.org/blob/0852d7c10d725d954d3e9a93513171baa1116d9f/readthedocs/core/static-src/core/js/doc-embed/version-compare.js#L13-L21) in their default theme, called [from here](https://github.com/readthedocs/readthedocs.org/blob/bc3e147770e5740314a8e8c33fec5d111c850498/readthedocs/core/static-src/core/js/doc-embed/footer.js#L66-L86).
10  
11  I had an extra requirement: I wanted pages on my `/en/latest/` documentation (which shows documentation for the in-development `main` branch on GitHub) to link back to the `/en/stable/` equivalent - but only if that page also existed in the stable documentation.
12  
13  I ended up [adding this snippet](https://github.com/simonw/datasette/commit/ffca55dfd7cc9b53522c2e5a2fa1ff67c9beadf2) of jQuery JavaScript to my custom ` docs/_templates/layout.html` template:
18  <script>
19  jQuery(function ($) {
20    // Show banner linking to /stable/ if this is a /latest/ page
21    if (!/\/latest\//.test(location.pathname)) {
22      return;
23    }
24    var stableUrl = location.pathname.replace("/latest/", "/stable/");
25    // Check it's not a 404
26    fetch(stableUrl, { method: "HEAD" }).then((response) => {
47  {% endblock %}
48  ```
49  The neatest piece of this solution is the way it uses an HTTP `HEAD` request via `fetch()` to confirm that the equivalent stable page exists before adding a link to it:
50  ```javascript
51    var stableUrl = location.pathname.replace("/latest/", "/stable/");
52    // Check it's not a 404
53    fetch(stableUrl, { method: "HEAD" }).then((response) => {
56  ```
57  
58  Here's what my fix looks like, running on https://docs.datasette.io/en/latest/csv_export.html
59  
60  <img width="978" alt="This page has a banner that says:  This documentation covers the development version of Datasette. See this page for the current stable release." src="https://user-images.githubusercontent.com/9599/150438021-0ab3db8f-7f65-4846-b2d4-880e10dce79d.png">
62  ## Alternative solution: sphinx-version-warning
63  
64  Just minutes after I committed my fix I was informed of the existence of [sphinx-version-warning](https://sphinx-version-warning.readthedocs.io/en/latest/), a Sphinx plugin that can solve this problem too. There's an example of using that to add a message to the `/latest/` page in [its own documentation configuration here](https://github.com/humitos/sphinx-version-warning/blob/a82156c2ea08e5feab406514d0ccd9d48a345f48/docs/conf.py#L32-L38).
65  
66  ```python
67  # -- Version Warning Banner configuration ------------------------------------
68  versionwarning_messages = {
69      'latest': 'This is a custom message only for version "latest" of this documentation.',
70  }
71  versionwarning_admonition_type = 'tip'
75  I decided to stick with my version, mainly because I like the `fetch()` solution I used.
76  
77  GitHub issue: [ Documentation should clarify /stable/ vs /latest/ #1608](https://github.com/simonw/datasette/issues/1608)

til/readthedocs/documentation-seo-canonical.md

3   I was thinking about documentation SEO today. Like many projects, Datasette offers multiple versions of the documentation:
4   
5   - https://docs.datasette.io/en/latest/ is the latest `main` branch on GitHub
6   - https://docs.datasette.io/en/stable/ is the most recent stable (non alpha or beta) release - currently 0.60
7   - https://docs.datasette.io/en/0.59.4/ is the documentation for that specific version - I have more than 70 of those now
23  <link rel="canonical" href="https://docs.datasette.io/en/stable/introspection.html" />
24  ```
25  Here's [their documentation](https://docs.readthedocs.io/en/latest/custom_domains.html#canonical-urls) covering this feature. I think you need to have configured a "default version" (though they may set a sensible default for that already) - for my project the page for doing that is the Advanced settings page at https://readthedocs.org/dashboard/datasette/advanced/
26  
27  This TIL started life as [a Twitter thread](https://twitter.com/simonw/status/1484287724773203971).

til/python/too-many-open-files-psutil.md

1   # Using psutil to investigate "Too many open files"
2   
3   I was getting this intermittent error running my Datasette test suite:
4   
5       OSError: [Errno 24] Too many open files
7   To figure out what was going on, I used the `psutil` package and its `open_files()` method.
8   
9   Here's the documentation for [psutil.Process.open_files](https://psutil.readthedocs.io/en/latest/#psutil.Process.open_files).
10  
11  I ran `pip install psutil` in my virtual environment.
12  
13  Then I ran `pytest --pdb` to drop into a Python debugger when a test failed.
14  
15  In the debugger I ran this:

til/python/struct-endianness.md

7   ```python
8   def decode_matchinfo(buf): 
9       # buf is a bytestring of unsigned integers, each 4 bytes long 
10      return struct.unpack("I" * (len(buf) // 4), buf) 
11  ```
44  ```python
45  def decode_matchinfo(buf):
46      # buf is a bytestring of unsigned integers, each 4 bytes long
47      return struct.unpack("<" + ("I" * (len(buf) // 4)), buf)
48  ```

til/python/stdlib-cli-tools.md

42  Then ran `rg`:
43  ```
44  rg 'if __name__ =' -l | grep -v 'test/' \
45    | grep -v 'tests/' | grep -v idlelib | grep -v turtledemo
46  ```
47  The `-l` option causes `ripgrep` to list matching files without showing the context of the match.
52  >
53  > ```bash
54  > rg 'if __name__ =' -l | grep -v -e 'test/' -e 'tests/' -e idlelib -e turtledemo
55  > ```
56  > `grep -v` still means "everything that doesn't match this" - but then the multiple `-e '...'` patterns are used to construct a "this pattern or this pattern or this pattern" filter. This saves on having to pipe through `grep -v` multiple times. Thanks for the tip, [dfc](https://news.ycombinator.com/item?id=36519308).
85  getopt.py
86  dbm/__init__.py
87  doctest.py
88  pickle.py
89  imaplib.py
290 ### nntplib
291 
292 "nntplib built-in demo - display the latest articles in a newsgroup"
293 
294 It defaults to `gmane.comp.python.general`:

til/python/setup-py-from-url.md

7   ```python
8       extras_require={
9           "test": ["pytest", "black", "hypothesis", "cogapp"],
10          "docs": ["furo", "sphinx-autobuild", "codespell", "sphinx-copybutton"],
11          "mypy": [
36  ```python
37      extras_require={
38          "test": ["pytest", "black", "hypothesis", "cogapp"],
39          "docs": [
40              "furo",

til/python/rye.md

1   # A few notes on Rye
2   
3   [Rye](https://github.com/mitsuhiko/rye) is Armin Ronacher's new experimental Python packaging tool. I decided to take it for a test-run.
4   
5   ## Installing Rye

til/python/quick-testing-pyenv.md

1   # Quickly testing code in a different Python version using pyenv
2   
3   I had [a bug](https://github.com/simonw/llm/issues/82#issuecomment-1629735729) that was only showing up in CI against Python 3.8.
4   
5   I used the following pattern with [pyenv](https://github.com/pyenv/pyenv) to quickly run the tests against that specific version.
6   
7   (I had previously installed `pyenv` using `brew install pyenv`.)
58  Then installed my current project into that environment like so:
59  ```bash
60  /tmp/py38env/bin/pip install -e '.[test]'
61  ```
62  Now I can run the tests like this:
63  ```bash
64  /tmp/py38env/bin/pytest
65  ```

til/python/pypy-macos.md

44      sys.exit(cli())
45  ```
46  Then I tested an import against a large CSV file like so:
47  ```
48  (venv) /tmp % time sqlite-utils insert pypy.db t /tmp/en.openfoodfacts.org.products.csv --csv

til/python/pyproject.md

135 ## It finds the Python files for you
136 
137 With `setup.py` I'm used to putting quite a bit of effort into telling Python which files should be included in the package - and making sure it doesn't include the `tests/` and `docs/` folder.
138 
139 As far as I can tell, the default behaviour now is to find all `*.py` files and all `*/*.py` files and include those - but to exclude common patterns such as `tests/` and `docs/` and `tests.py` and `test_*.py`.
140 
141 This behaviour is defined by `setuptools`. The [Automatic Discovery](https://setuptools.pypa.io/en/latest/userguide/package_discovery.html#automatic-discovery) section of the `setuptools` documentation describes these rules in detail.
142 
143 ## Adding metadata
203 ## Test dependencies
204 
205 I like being able to run `pip install -e '.[test]'` to install test dependencies - things like `pytest`, which are needed to run the project tests but shouldn't be bundled with the project itself when it is installed.
206 
207 Those can be added in a section like this:
209 ```toml
210 [project.optional-dependencies]
211 test = ["pytest"]
212 ```
213 I added that to my `/tmp/demo-package/pyproject.toml` file, then ran this in my elsewhere virtual environment:
214 ```bash
215 pip install -e '/tmp/demo-package[test]'
216 ```
217 The result was an installation of `pytest`, visible when I ran `pip freeze`.
218 
219 ## Package data

til/python/pip-tools.md

14  psycopg2-binary
15  dj-database-url
16  pytest-django
17  django-extensions
18  django-htmx
21  httpx
22  sentry-sdk
23  pytest-httpx
24  ics==0.7
25  ```
28  > `django.utils.deprecation.RemovedInDjango41Warning: 'django_extensions' defines default_app_config = 'django_extensions.apps.DjangoExtensionsConfig'.`
29  
30  I'm pinning `ics` to the exact version 0.7 due to a broken test I experienced with more recent versions.
31  
32  ## Compiling requirements.txt from requirements.in

til/python/pdb-interact.md

3   Today [Carlton told me](https://twitter.com/carltongibson/status/1587155176590385159) about the [interact command](https://docs.python.org/3.10/library/pdb.html#pdbcommand-interact) in the Python debugger.
4   
5   Here's how to use it with `pytest` (but it works anywhere else where you find yourself in a `pdb` session).
6   
7   Use `pytest --pdb` to cause `pytest` to open a debugger at the first failed assertion (I added `assert False` to my test suite to demonstrate this).
8   
9   Then type `interact` to drop into a full Python interactive prompt that keeps all of the local and global variables from the debugger:
10  
11  ```
12  % pytest -k test_drop --pdb                                               
13  ======== test session starts ========
14  platform darwin -- Python 3.10.3, pytest-7.1.3, pluggy-1.0.0
15  ...
16  >       assert False
17  E       assert False
18  
19  tests/test_api_write.py:272: AssertionError
20  >>>> entering PDB >>>>
21  
22  >>>> PDB post_mortem (IO-capturing turned off) >>>>
23  > /Users/simon/Dropbox/Development/datasette/tests/test_api_write.py(272)test_drop_table()
24  -> assert False
25  (Pdb) interact
26  >>> locals().keys()
27  dict_keys(['__name__', '__doc__', '__package__', '__loader__', '__spec__', '__file__', '__cached__', '__builtins__',
28    '@py_builtins', '@pytest_ar', 'Datasette', 'sqlite3', 'pytest', 'time', 'ds_write', 'write_token', 'test_write_row',
29    'test_write_rows', 'test_write_row_errors', 'test_delete_row', 'test_drop_table', 'scenario', 'token', 'should_work',
30    'path', 'response', '@py_assert0', '@py_format2'])
31  ```

til/python/packaging-pyinstaller.md

67  I solved this by adding each `ModuleNotFoundError` module to `--hidden-import` until it worked.
68  
69  I've tested this script (and the generated executables) on both macOS and Ubuntu Linux so far, and it's worked perfectly in both cases. See [issue 93](https://github.com/simonw/datasette/issues/93) for more details.

til/python/os-remove-windows.md

1   # os.remove() on Windows fails if the file is already open
2   
3   I puzzled over this one for [quite a while](https://github.com/simonw/sqlite-utils/issues/503) this morning. I had this test that was failing with Windows on Python 3.11:
4   
5   ```python
6   @pytest.mark.parametrize(
7       "use_path,file_exists", [(True, True), (True, False), (False, True), (False, False)]
8   )
9   def test_recreate(tmpdir, use_path, file_exists):
10      filepath = str(tmpdir / "data.db")
11      if use_path:
17      assert ["t2"] == Database(filepath).table_names()
18  ```
19  The test checks that the `recreate=True` option to my `Database()` constructor deletes and re-creates the file.
20  
21  Here's [the implementation](https://github.com/simonw/sqlite-utils/blob/9cbe19ac0547031f3b626d9d18ef05c0d193bf79/sqlite_utils/db.py#L320-L323) of that `recreate=True` option:
28  On Windows I was getting the following exception:
29  ```
30  FAILED tests/test_recreate.py::test_recreate[True-True] - 
31    PermissionError: [WinError 32] The process cannot access the file because it is being used by another process:
32    'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\pytest-of-runneradmin\\pytest-0\\test_recreate_True_True_0\\data.db'
33  ```
34  Eventually I spotted the problem: my call on this line was opening a SQLite connection to the `data.db` file:
39  But it wasn't explicitly closing the SQLite connection. It turns out that leaves the database file open - and since the file is still open Windows raised an exception when `os.remove()` was called against it.
40  
41  I fixed the error by closing the SQLite3 connection in my test, like this:
42  
43  ```python

til/python/locust.md

1   # Simple load testing with Locust
2   
3   I've been using [Locust](https://locust.io/) recently to run some load tests - most significantly [these tests](https://github.com/simonw/django_sqlite_benchmark/issues?q=is%3Aissue+is%3Aclosed) against SQLite running with Django and [this test](https://github.com/simonw/datasette-gunicorn/issues/1) exercising Datasette and Gunicorn.
4   
5   ## A really basic test
6   
7   Locust tests are defined in a `locustfile.py` file. Here's the most basic possible test, which sends requests to the `/` page of a web application:
8   
9   ```python
22      locust
23  
24  This opens a web server on http://0.0.0.0:8089/ (by default) which offers an interface for starting a new test:
25  
26  <img width="484" alt="A form with fields for number of users (peak concurrency), spawn rate (users started/second) and host (the URL to start the tests at)" src="https://user-images.githubusercontent.com/9599/197367313-8b4c85b9-cd0e-45ed-adc2-789116687cb5.png">
27  
28  You can run this for as long as you like, and it will produce both statistics on the load test and some pleasing charts:
29  
30  <img width="1049" alt="Charts for total requests per second over time and response times in ms" src="https://user-images.githubusercontent.com/9599/197367351-03735a41-00cb-4bbf-a13d-cb22bbaf6e3b.png">
32  ## Using the command-line
33  
34  You can also run tests without the web server at all. I tend to use this option as it's quicker to repeat a test, and you can easily copy and paste the results into a GitHub issue thread.
35  
36      locust --headless --users 4 --spawn-rate 2 -H http://127.0.0.1:8001
37  
38  This runs the tests in the current `locustfile.py` against `http://127.0.0.1:8001`, with four concurrent users and ramping up at 2 users every second (so taking two seconds to ramp up to full concurrency).
39  
40  Hit `Ctrl+C` to end the test. It will end up producing something like this:
41  
42  ```
55  ```
56  
57  ## More complicated tests
58  
59  Locust tests can get a lot more complex than this. The documentation [provides this example](https://docs.locust.io/en/stable/writing-a-locustfile.html):
60  
61  ```python

til/python/introspect-function-parameters.md

18  ```
19  
20  And here's an illustrative unit test:
21  
22  ```python
23  def test_call_with_supported_arguments():
24      def foo(a, b):
25          return "{}+{}".format(a, b)
28      assert "1+2" == utils.call_with_supported_arguments(foo, a=1, b=2, c=3)
29  
30      with pytest.raises(TypeError):
31          utils.call_with_supported_arguments(foo, a=1)
32  ```

til/python/installing-upgrading-plugins-with-pipx.md

56      ]
57  
58  I added all of this to the Datasette docs here: https://docs.datasette.io/en/latest/installation.html#using-pipx
59  (see https://github.com/simonw/datasette/issues/756).

til/python/generate-nested-json-summary.md

1   # Generated a summary of nested JSON data
2   
3   I was trying to figure out the shape of the JSON object from https://github.com/simonw/coronavirus-data-gov-archive/blob/master/data_latest.json?raw=true - which is 3.2MB and heavily nested, so it's difficult to get a good feel for the shape.
4   
5   I solved this with a Python `summarize()` function which recursively truncates the nested lists and dictionaries.
28  import json, requests
29  data = requests.get(
30      "https://github.com/simonw/coronavirus-data-gov-archive/blob/master/data_latest.json?raw=true"
31  ).json()
32  print(json.dumps(summarize(data, list_limit=2, key_limit=7), indent=4))

til/python/cog-to-update-help-in-readme.md

11  import cog
12  from csvs_to_sqlite import cli
13  from click.testing import CliRunner
14  runner = CliRunner()
15  result = runner.invoke(cli.cli, ["--help"])
37  ## Testing with cog --check
38  
39  A version of Cog released after I first wrote this TIL added a new `--check` option, so you can run a test in CI to check if the file needs to be updated using:
40  
41      cog --check README.md
42  
43  ## Writing a test (before cog --check)
44  
45  Any time I generate content like this in a repo I like to include a test that will fail if I forget to update the content.
46  
47  `cog` clearly isn't designed to be used as an independent library, but I came up with the following pattern `pytest` test which works well, in my `tests/test_csvs_to_sqlite.py` module:
48  
49  ```python
54  
55  
56  def test_if_cog_needs_to_be_run():
57      _stdout = sys.stdout
58      sys.stdout = StringIO()
77  Cog then writes the generated output to `stdout` - which I capture with that `sys.stdout` trick.
78  
79  Finally, I compare the generated output to the current file content and fail the test with a reminder to run `cog -r` if they do not match.
80  
81  ## Cog for reStructuredText

til/python/codespell.md

51  jobs:
52    spellcheck:
53      runs-on: ubuntu-latest
54      steps:
55      - uses: actions/checkout@v2

til/python/callable.md

33  `check_callable(obj)` returns a `CallableStatus` named tuple, with an `is_callable` boolean saying if it can be caled and an `is_async_callable` boolean specifying if you need to use `await` with it.
34  
35  I wrote these `pytest` tests to exercise the `check_callable()` function:
36  
37  ```python
38  import pytest
39  
40  
61  
62  
63  @pytest.mark.parametrize(
64      "obj,expected_is_callable,expected_is_async_callable",
65      (
77      ),
78  )
79  def test_check_callable(obj, expected_is_callable, expected_is_async_callable):
80      status = check_callable(obj)
81      assert status.is_callable == expected_is_callable

til/python/call-pip-programatically.md

3   I needed this for the `datasette install` and `datasette uninstall` commands, see [issue #925](https://github.com/simonw/datasette/issues/925).
4   
5   My initial attempt at this resulted in weird testing errors ([#928](https://github.com/simonw/datasette/issues/928)) - while investigating them I stumbled across [this comment](https://github.com/pypa/pip/blob/e060970d51c5946beac8447eb95585d83019582d/src/pip/_internal/cli/main.py#L23-L47) in the `pip` source code:
6   
7   ```
41       run_module("pip", run_name="__main__") 
42  ```
43  And here's how I wrote [a unit test](https://github.com/simonw/datasette/blob/afdeda8216d4d3027f87583ccdbef17ad85022ef/tests/test_cli.py#L114-L124) for it:
44  ```python
45  @mock.patch("datasette.cli.run_module")
46  def test_install(run_module):
47      runner = CliRunner()
48      runner.invoke(cli, ["install", "datasette-mock-plugin", "datasette-mock-plugin2"])

til/pytest/treat-warnings-as-errors.md

1   # Treating warnings as errors in pytest
2   
3   I was seeing this warning in a Django project when I thought I was correctly using timezone-aware dates everywhere:
5   > RuntimeWarning: DateTimeField Shift.shift_start received a naive datetime (2022-04-01 00:00:00) while time zone support is active
6   
7   Running `pytest -Werror` turns those warnings into errors that fail the tests.
8   
9   Which means you can investigate them in the Python debugger by running:
10  
11      pytest -Werror --pdb -x
12  
13  The `--pdb` starts the debugger at the warning (now error) and the `-x` stops the tests after the first failure.
14  
15  ## In pytest.ini
16  
17  You can also set this in `pytest.ini` - useful if you want ALL warnings to be failures in both development and CI.
18  
19  Add the following to the `pytest.ini` file:
20  
21  ```ini
22  [pytest]
23  # ...
24  filterwarnings =
31  
32  ```ini
33  [pytest]
34  # ...
35  filterwarnings =

til/pytest/test-click-app-with-streaming-input.md

1   # Testing a Click app with streaming input
2   
3   For [sqlite-utils#364](https://github.com/simonw/sqlite-utils/issues/364) I needed to write a test for a [Click](https://click.palletsprojects.com/) app which dealt with input streamed to standard input. I needed to run some assertions during that process, which ruled out the usual [CliRunner.invoke()](https://click.palletsprojects.com/en/8.0.x/testing/) testing tool since that works by running the command until completion.
4   
5   I decided to use `subprocess` to run the application. Here's the pattern I came up with for the test:
6   ```python
7   def test_insert_streaming_batch_size_1(db_path):
8       # https://github.com/simonw/sqlite-utils/issues/364
9       # Streaming with --batch-size 1 should commit on each record
46  I realized I needed to call `proc.stdin.flush()` after each write to ensure the write was pushed to the process in a predictable manner.
47  
48  At the end of the test, running `proc.stdin.close()` is equivalent to sending an end-of-file, then `proc.wait()` ensures the process has finished and terminated.

til/pytest/syrupy.md

1   # Snapshot testing with Syrupy
2   
3   I'm a big fan of snapshot testing - writing tests where you compare the output of some function to a previously saved version, and can re-generate that version from scratch any time something changes.
4   
5   I usually do this by hand - I run `pytest -x --pdb` to stop at the first failing test and drop into a debugger, then copy out the representation of the generated value and copy it into the test. I wrote about how I use this pattern a few years ago in [How to cheat at unit tests with pytest and Black](https://simonwillison.net/2020/Feb/11/cheating-at-unit-tests-pytest-black/).
6   
7   Today I learned how to do the same thing with the [Syrupy](https://github.com/tophat/syrupy) plugin for [pytest](https://docs.pytest.org/). I think I'll be using this for many of my future projects.
8   
9   ## Some initial tests
10  
11  I created a `tests/test_stuff.py` file with the following contents:
12  
13  ```python
14  def test_one(snapshot):
15      assert "hello" == snapshot
16  
17  
18  def test_two(snapshot):
19      assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}
20  ```
21  Then I installed both `pytest` and `syrupy`:
22  
23  ```bash
24  pip install pytest syrupy
25  ```
26  Now in my parent folder I can run this:
27  ```bash
28  pytest
29  ```
30  And the tests fail:
31  ```
32  tests/test_stuff.py FF                                                              [100%]
33  
34  ======================================== FAILURES =========================================
35  ________________________________________ test_one _________________________________________
36  
37  snapshot = SnapshotAssertion(name='snapshot', num_executions=1)
38  
39      def test_one(snapshot):
40  >       assert "hello" == snapshot
41  E       AssertionError: assert [+ received] == [- snapshot]
42  E         Snapshot 'test_one' does not exist!
43  E         + 'hello'
44  
45  tests/test_stuff.py:2: AssertionError
46  ________________________________________ test_two _________________________________________
47  
48  snapshot = SnapshotAssertion(name='snapshot', num_executions=1)
49  
50      def test_two(snapshot):
51  >       assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}
52  E       AssertionError: assert [- snapshot] == [+ received]
53  E         Snapshot 'test_two' does not exist!
54  E         + dict({
55  E         +   'bar': 
57  E         ...Full output truncated (9 lines hidden), use '-vv' to show
58  
59  tests/test_stuff.py:5: AssertionError
60  --------------------------------- snapshot report summary ---------------------------------
61  2 snapshots failed.
62  ================================= short test summary info =================================
63  FAILED tests/test_stuff.py::test_one - AssertionError: assert [+ received] == [- snapshot]
64  FAILED tests/test_stuff.py::test_two - AssertionError: assert [- snapshot] == [+ received]
65  ==================================== 2 failed in 0.05s ====================================
66  ```
67  The snapshots don't exist yet. But I can create them automatically by running this:
68  ```bash
69  pytest --snapshot-update
70  ```
71  Which outputs passing tests along with:
72  ```
73  --------------------------------- snapshot report summary ---------------------------------
75  ==================================== 2 passed in 0.01s ====================================
76  ```
77  And sure enough, there's now a new folder called `tests/__snapshots__` with a file called `test_stuff.ambr` which contains this:
78  ```
79  # serializer version: 1
80  # name: test_one
81    'hello'
82  # ---
83  # name: test_two
84    dict({
85      'bar': dict({
94  # ---
95  ```
96  Running `pytest` again passes, because the snapshots exist and continue to match the test output.
97  
98  The serialized snapshot format is designed to be checked into Git. It's pleasantly readable - I can review that and see what it's testing, and I could even update it by hand - though I'll much more likely use the `--snapshot-update` flag and then eyeball the differences.
99  
100 ## Adding a dataclass
112 
113 
114 def test_one(snapshot):
115     assert "hello" == snapshot
116 
117 
118 def test_two(snapshot):
119     assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}
120 
121 
122 def test_three(snapshot):
123     assert Foo(1, "hello") == snapshot
124 ```
125 Running `pytest` again failed. `pytest --snapshot-update` passed and updated my snapshot file, adding this to it:
126 ```
127 # name: test_three
128   Foo(bar=1, baz='hello')
129 ```
138         self.bar = bar
139 
140 def test_four(snapshot):
141     assert WeirdClass(1, 2) == snapshot
142 ```
143 Serialized to:
144 ```
145 # name: test_four
146   WeirdClass(
147     bar=2,

til/pytest/subprocess-server.md

1   # Start a server in a subprocess during a pytest session
2   
3   I wanted to start an actual server process, run it for the duration of my pytest session and shut it down at the end.
4   
5   Here's the recipe I came up with. This fixture lives in `conftest.py`:
6   
7   ```python
8   import pytest
9   import sqlite_utils
10  import subprocess
11  
12  @pytest.fixture(scope="session")
13  def ds_server(tmp_path_factory):
14      db_directory = tmp_path_factory.mktemp("dbs")
15      db_path = db_directory / "test.db"
16      db = sqlite_utils.Database(db_path)
17      insert_test_data(db)
18      ds_proc = subprocess.Popen(
19          [
31      assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8")
32      yield ds_proc
33      # Shut it down at the end of the pytest session
34      ds_proc.terminate()
35  ```
36  A test looks like this:
37  ```python
38  import httpx
39  
40  def test_server_starts(ds_server):
41      response = httpx.get("http://127.0.0.1:8041/")
42      assert response.status_code == 200
45  ## Alternative recipe for serving static files
46  
47  While [adding tests to Datasette Lite](https://github.com/simonw/datasette-lite/issues/35) I found myself needing to run a localhost server that served static files directly.
48  
49  I completely forgot about this TIL, and instead took inspiration [from pytest-simplehttpserver](https://github.com/ppmdo/pytest-simplehttpserver/blob/a82ad31912121c074ff1a76c4628a1c42c32b41b/src/pytest_simplehttpserver/pytest_plugin.py#L17-L28) - coming up with this pattern:
50  
51  ```python
52  from subprocess import Popen, PIPE
53  import pathlib
54  import pytest
55  import time
56  from http.client import HTTPConnection
59  
60  
61  @pytest.fixture(scope="module")
62  def static_server():
63      process = Popen(
85  Again, including `static_server` as a fixture is enough to ensure requests to `http://localhost:8123/` will be served by that temporary server.
86  
87  I like how this version polls for a successful HEAD request (a trick inspired by `pytest-simplehttpserver`) rather than just sleeping.

til/pytest/show-files-opened-by-tests.md

1   # Show files opened by pytest tests
2   
3   My test suite for [Datasette](https://github.com/simonw/datasette) has grown so large that running the whole thing sometimes causes me to run out of file handles.
4   
5   I've not solved this yet, but I did figure out a pattern to get `pytest` to show me which new files were opened by which tests.
6   
7   Add the following to `conftest.py`:
8   
9   ```
11  
12  
13  @pytest.fixture(autouse=True)
14  def check_for_new_file_handles(request):
15      proc = psutil.Process()
22  ```
23  
24  This uses [psutil](https://pypi.org/project/psutil/) (`pip install psutil`) to build a set of the open files before and after the test runs. It then uses a list comprehension to figure out which file handles are new.
25  
26  Using `@pytest.fixture(autouse=True)` means it will automatically be used for every test.
27  
28  It's a `yield` fixture, which means the part of the code before the `yield` statement runs before the test, then the part afterwards runs after the test function has finished.
29  
30  Accepting the `request` argument means it gets access to a `pytest` request object, which includes `request.node` which is an object representing the test that is being executed.
31  
32  You need to run `pytest -s` to see the output (without the `-s` the output is hidden).
33  
34  Example output:
35    
36  `tests/test_permissions.py .[popenfile(path='/private/var/folders/wr/hn3206rs1yzgq3r49bz8nvnh0000gn/T/tmp76w2ukin/fixtures.db', fd=8)] opened by <Function test_view_padlock[/-None-200-200]>..`

til/pytest/session-scoped-tmp.md

1   # Session-scoped temporary directories in pytest
2   
3   I habitually use the `tmpdir` fixture in pytest to get a temporary directory that will be cleaned up after each test, but that doesn't work with `scope="session"` - which can be used to ensure an expensive fixture is run only once per test session and the generated content is used for multiple tests.
4   
5   To get a temporary directory that works with `scope="session"`, use the `tmp_path_factory` built-in pytest fixture like this:
6   
7   ```python
8   import pytest
9   
10  
11  @pytest.fixture(scope="session")
12  def template_dir(tmp_path_factory):
13      template_dir = tmp_path_factory.mktemp("page-templates")
19  
20  
21  def test_about(template_dir):
22      assert "ABOUT!" == (template_dir / "pages" / "about.html").read_text()
23  
24  
25  def test_request(template_dir):
26      assert "request" == (template_dir / "pages" / "request.html").read_text()
27  ```
28  
29  Example: https://github.com/simonw/datasette/blob/1b7b66c465e44025ec73421bd69752e42f108321/tests/test_custom_pages.py#L16-L45

til/pytest/registering-plugins-in-tests.md

1   # Registering temporary pluggy plugins inside tests
2   
3   While implementing more finely-grained permissions for `datasette-insert-api` ([issue 8](https://github.com/simonw/datasette-insert-api/issues/8)) I decided I wanted to register a Datasette pluggy plugin for the duration of a single test.
4   
5   Here's the pattern I figured out for doing that:
8   from datasette import hookimpl
9   from datasette.plugins import pm
10  import pytest
11  
12  
13  def test_using_test_plugin():
14      class TestPlugin:
15          __name__ = "TestPlugin"
22      pm.register(TestPlugin(), name="undo")
23      try:
24          # Rest of test goes here
25      finally:
26          pm.unregister(name="undo")
27  ```
28  
29  Here's [an example](https://github.com/simonw/datasette-insert/blob/7f4c2b3954190d547619d043bbe714481b10ac1e/tests/test_insert_api.py) of a test that uses a pytest fixture to register (and de-register) a plugin:
30  
31  ```python
33  from datasette.app import Datasette
34  from datasette.plugins import pm
35  import pytest
36  
37  
38  @pytest.fixture
39  def unsafe():
40      class UnsafeInsertAll:
51  
52  
53  @pytest.mark.asyncio
54  async def test_insert_alter(ds, unsafe):
55      async with httpx.AsyncClient(app=ds.app()) as client:
56          response = await client.post(

til/pytest/pytest-subprocess.md

1   # Mocking subprocess with pytest-subprocess
2   
3   For [apple-notes-to-sqlite](https://github.com/dogsheep/apple-notes-to-sqlite) I needed to write some tests that simulated executing the `osascript` command using the Python `subprocess` module.
4   
5   I wanted my tests to run on Linux CI machines, where that command would not exist.
6   
7   After failing to use `unittest.mock.patch` to solve this, I went looking for alternatives. I found [pytest-subprocess](https://pypi.org/project/pytest-subprocess/).
8   
9   Here's the relevant section of [the test I wrote](https://github.com/dogsheep/apple-notes-to-sqlite/blob/0.1/tests/test_apple_notes_to_sqlite.py):
10  
11  ```python
17  """
18  
19  def test_apple_notes_to_sqlite_dump(fp):
20      fp.register_subprocess(["osascript", "-e", COUNT_SCRIPT], stdout=b"2")
21      fp.register_subprocess(["osascript", "-e", fp.any()], stdout=FAKE_OUTPUT)
25          # ...
26  ```
27  `fp` is the fixture provided by the package (you need to `pip install pytest-subprocess` for this to work).
28  
29  `COUNT_SCRIPT` here is the first of my `osascript` constants. It looks like this (in `cli.py`):
51  I eventually figured that using `fp.any()` was easier than specifying the exact script. This is a wildcard value which matches any string. It returns the full `FAKE_OUTPUT` variable as the simulated standard out.
52  
53  What's useful about `pytest-subprocess` is that it works for both `subprocess.check_output()` and more complex `subprocess.Popen()` calls - both of which I was using in this script.

til/pytest/pytest-recording-vcr.md

1   # Using VCR and pytest with pytest-recording
2   
3   [pytest-recording](https://github.com/kiwicom/pytest-recording) is a neat pytest plugin that makes it easy to use the [VCR library](https://vcrpy.readthedocs.io/), which helps write tests against HTTP resources by automatically capturing responses and baking them into a YAML file to be replayed during the tests.
4   
5   It even works with [boto3](https://aws.amazon.com/sdk-for-python/)!
6   
7   To use it, first install it with `pip install pytest-recording` and then add the `@pytest.mark.vcr` decorator to a test that makes HTTP calls:
8   
9   ```python
10  @pytest.mark.vcr
11  def test_create():
12      runner = CliRunner()
13      with runner.isolated_filesystem():
14          result = runner.invoke(cli, ["create", "pytest-bucket-simonw-1", "-c"])
15          assert result.exit_code == 0
16  ```
17  
18  The first time you run the tests, use the `--record-mode=once` option:
19  
20      pytest -k test_create --record-mode=once
21  
22  This defaults to creating a YAML file in `tests/cassettes/test_s3_credentials/test_create.yaml`.
23  
24  Subsequent runs of `pytest -k test_create` will reuse those recorded HTTP requests and will not make any network requests - I confirmed this by turning off my laptop's WiFi.

til/pytest/pytest-mock-calls.md

1   # Quick and dirty mock testing with mock_calls
2   
3   I needed to write a test that checked for a really complex sequence of mock calls for [s3-credentials#3](https://github.com/simonw/s3-credentials/issues/3).
4   
5   I ended up using the following trick, using [pytest-mock](https://pypi.org/project/pytest-mock/):
6   
7   ```python
8   def test_create(mocker):
9       boto3 = mocker.patch("boto3.client")
10      runner = CliRunner()
11      with runner.isolated_filesystem():
12          result = runner.invoke(cli, ["create", "pytest-bucket-simonw-1", "-c"])
13          assert [str(c) for c in boto3.mock_calls] == [
14              "call('s3')",
15              "call('iam')",
16              "call().head_bucket(Bucket='pytest-bucket-simonw-1')",
17              "call().get_user(UserName='s3.read-write.pytest-bucket-simonw-1')",
18              'call().put_user_policy(PolicyDocument=\'{"Version": "2012-10-17", "Statement": [{"Sid": "ListObjectsInBucket", "Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1"]}, {"Sid": "AllObjectActions", "Effect": "Allow", "Action": "s3:*Object", "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1/*"]}]}\', PolicyName=\'s3.read-write.pytest-bucket-simonw-1\', UserName=\'s3.read-write.pytest-bucket-simonw-1\')',
19              "call().create_access_key(UserName='s3.read-write.pytest-bucket-simonw-1')",
20              "call().create_access_key().__getitem__('AccessKey')",
21              "call().create_access_key().__getitem__().__str__()",
22          ]
23  ```
24  I used the trick I describe in [How to cheat at unit tests with pytest and Black](https://simonwillison.net/2020/Feb/11/cheating-at-unit-tests-pytest-black/) where I run that comparison against an empty `[]` list, then use `pytest --pdb` to drop into a debugger and copy and paste the output of `[str(c) for c in boto3.mock_calls]` into my test code.
25  
26  Initially I used a comparison directly against `boto3.mock_calls` - but this threw a surprising error. The calls sequence I baked into my tests looked like this:
27  
28  ```python
29  from unittest.mock import call
30  
31  # ...
34              call("s3"),
35              call("iam"),
36              call().head_bucket(Bucket="pytest-bucket-simonw-1"),
37              call().get_user(UserName="s3.read-write.pytest-bucket-simonw-1"),
38              call().put_user_policy(
39                  PolicyDocument='{"Version": "2012-10-17", "Statement": [{"Sid": "ListObjectsInBucket", "Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1"]}, {"Sid": "AllObjectActions", "Effect": "Allow", "Action": "s3:*Object", "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1/*"]}]}',
40                  PolicyName="s3.read-write.pytest-bucket-simonw-1",
41                  UserName="s3.read-write.pytest-bucket-simonw-1",
42              ),
43              call().create_access_key(UserName="s3.read-write.pytest-bucket-simonw-1"),
44              call().create_access_key().__getitem__("AccessKey"),
45              call().create_access_key().__getitem__().__str__(),
46          ]
47  ```
48  But when I ran `pytest` that last one failed:
49  ```
50  E             -  'call().create_access_key().__getitem__()',
53  E             ?                                          ^^^^^^^^^^
54  ```
55  It turns out `__str__()` calls do not play well with the `call()` constructor - see [this StackOverflow question](https://stackoverflow.com/questions/61926147/how-to-represent-unittest-mock-call-str).
56  
57  My solution was to cast them all to `str()` using a list comprehension, which ended up fixing that problem.
59  ## Gotcha: parameter ordering
60  
61  There's one major flaw to the `str()` trick I'm using here: the order in which parameters are displayed in the string representation of `call()` may differ between Python versions. I had to undo this trick in one place I was using it ([see here](https://github.com/simonw/s3-credentials/issues/8)) as a result due to the following test failure:
62  
63  ```

til/pytest/pytest-code-coverage.md

1   # Code coverage using pytest and codecov.io
2   
3   I got my [asgi-csrf](https://github.com/simonw/asgi-csrf) Python package up to 100% code coverage. Here's [the pull request](https://github.com/simonw/asgi-csrf/issues/13).
4   
5   I started by installing and using the [pytest-cov](https://pypi.org/project/pytest-cov/) pytest plugin.
6   
7   ```
8   pip install pytest-cov
9   pytest --cov=asgi_csrf
10  ```
11  This shows the current code coverage percentage for the `asgi_csrf` module in the terminal output:
13  collected 18 items                                                                                                                                                   
14  
15  test_asgi_csrf.py ..................                                                                                                                           [100%]
16  
17  ---------- coverage: platform darwin, python 3.7.3-final-0 -----------
23  ========= 18 passed in 0.37s =========
24  ```
25  To generate an HTML report showing which lines are not covered by tests:
26  ```
27  pytest --cov=asgi_csrf --cov-report=html
28  open htmlcov/index.html
29  ```
30  Here's a hosted copy of that report: https://asgi-csrf-htmlcov-ewca4t9se.vercel.app/asgi_csrf_py.html
31  
32  ## Failing the tests if coverage is below a certain threshold
33  
34  The `--cov-fail-under=100` option does this:
35  
36  ```
37  pytest --cov-fail-under=100 --cov asgi_csrf 
38  ======= test session starts =======
39  platform darwin -- Python 3.7.3, pytest-6.0.1, py-1.9.0, pluggy-0.13.1
40  rootdir: /Users/simon/Dropbox/Development/asgi-csrf
41  plugins: cov-2.10.1, asyncio-0.14.0
42  collected 18 items                                                                                                                                                   
43  
44  test_asgi_csrf.py ..................                                                                                                                           [100%]
45  
46  ---------- coverage: platform darwin, python 3.7.3-final-0 -----------
49  asgi_csrf.py     169     13    92%
50  
51  FAIL Required test coverage of 100% not reached. Total coverage: 92.31%
52  ```
53  I added this to my [GitHub test action](https://github.com/simonw/asgi-csrf/blob/83d2b4f6bb034b746fd3f20f57ebdbaeae007a73/.github/workflows/test.yml#L27-L29):
54  ```yaml
55      - name: Run tests
56        run: |
57          pytest --cov-fail-under=100 --cov asgi_csrf
58  ```
59  ## Pushing results to codecov.io
61  https://codecov.io/ offers free coverage reporting for open source projects. I authorized it against my GitHub account, then enabled it for the `asgi-csrf` project by navigating to https://codecov.io/gh/simonw/asgi-csrf (hacking the URL saves you from having to paginate through all of your repos looking for the right one).
62  
63  codecov.io gives you a token - set that as a GitHub repository secret as `CODECOV_TOKEN` - then add the following to the test action configuration:
64  ```yaml
65      - name: Upload coverage to codecov.io
69          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
70  ```
71  This will upload your coverage report (no matter if the previous test step failed or succeeded). codecove.io then reports back to pull requests and maintains a dashboard for your project.
72  
73  codecov.io doesn't detect if you use a `main` or `master` branch so I had to switch the default branch in the settings at https://codecov.io/gh/simonw/asgi-csrf/settings

til/pytest/pytest-argparse.md

1   # Writing pytest tests against tools written with argparse
2   
3   I usually build command-line tools using [Click](https://click.palletsprojects.com/) (and my [click-app](https://github.com/simonw/click-app) cookiecutter template), which includes a really nice [set of tools](https://click.palletsprojects.com/en/8.0.x/testing/) for writing tests.
4   
5   Today I decided to try building a tool called [stream-delay](https://github.com/simonw/stream-delay) using [argparse]() from the Python standard library, since it didn't need any other dependencies.
6   
7   The one challenge I had was how to write the tests. I used [pytest](https://pytest.org/) as a test-only dependency.
8   
9   Here's the pattern I came up with, using the [capsys pytest fixture](https://docs.pytest.org/en/6.2.x/capture.html) to capture standard output from my tool.
10  
11  ```python
12  from stream_delay import main
13  import pytest
14  
15  @pytest.mark.parametrize("option", ("-h", "--help"))
16  def test_help(capsys, option):
17      try:
18          main([option])
38      # ...
39  ```
40  As you can see, `main()` takes an optional list of arguments. The default for that is `None` which will cause `argparse` to read `sys.argv` - but I can inject arguments to the function from my tests if I need to.
41  
42  I'm catching the `SystemExit` exception because this will be raised by default if you use `-h` or `--help` - but I still want to finish my test execution so I can inspect the captured output.
43  
44  Complete code:
45  
46  - [stream_delay.py](https://github.com/simonw/stream-delay/blob/0.1/stream_delay.py)
47  - [tests/test_stream_delay.py](https://github.com/simonw/stream-delay/blob/0.1/tests/test_stream_delay.py)

til/pytest/playwright-pytest.md

1   # Using pytest and Playwright to test a JavaScript web application
2   
3   I [decided to add](https://github.com/simonw/datasette-lite/issues/35) automated tests to my [Datasette Lite](https://simonwillison.net/2022/May/4/datasette-lite/) project. Datasette Lite bundles my Datasette Python web application as a client-side application running inside WebAssembly using Pyodide.
4   
5   I wrote the tests using [playwright-pytest](https://github.com/microsoft/playwright-pytest), which lets you write tests in Python using Microsoft's [Playwright](https://playwright.dev/) browser automation library.
6   
7   ## Installing playwright-pytest
8   
9   Two steps:
10  
11      pip install playwright-pytest
12  
13  Then a second step to install the browsers using by Playwright itself:
15      playwright install
16  
17  I had those browsers installed already, but I still needed to run that command since the updated version of `playwright-pytest` needed more recent versions.
18  
19  (I had limited internet while doing this, and discovered that you can trick Playwright into using an older browser version by renaming a folder in `~/Library/Caches/ms-playwright` to the one that shows up in the error message that says that the browsers cannot be found.)
20  
21  ## An initial test
22  
23  The first test I wrote looked like this, saved in `test_app.py`:
24  
25  ```python
26  from playwright.sync_api import Page, expect
27  
28  def test_initial_load(page: Page):
29      page.goto("https://lite.datasette.io/")
30      loading = page.locator("#loading-indicator")
38  ```
39  
40  Then run the test by running this in the same directory as that file:
41  
42      pytest
43  
44  `playwright-pytest` provides the `page` fixture - annotating it with `: Page` is optional but if you do that then VS Code knows what it is and can provide autocomplete in the editor.
45  
46  `page.goto()` causes the browser to navigate to that URL.
53  expect(loading).to_have_css("display", "block")
54  ```
55  The `expect()` helper function encapsulates the concept of polling the page to wait for something to become true within a time limit. This is the key technique for avoiding "flaky" tests when working with Playwright.
56  
57  
58  The assertions [are listed here](https://playwright.dev/python/docs/test-assertions).
59  
60  You don't actually need to use `expect()` though - that's useful if you don't know how long it will take for the page to load, but if you know the page is already loaded you can write assertions like this instead:
67  ```
68  
69  ## pytest options
70  
71  The `playwright-pytest` package adds a bunch of new options to `pytest`. The most useful is `--headed`:
72  
73      pytest --headed
74  
75  This runs the tests in "headed" mode - which means a visible browser window pops up during the tests so you can see what is happening.
76  
77  `--browser firefox` runs them using Firefox instead of Chromium.
78  
79  `--tracing on` is really interesting: it generates a trace ZIP file which you can then open using https://trace.playwright.dev/ to explore a detailed trace of the test as it executed.
80  
81  `--video on` records a video (as a `.webm` file) of the test. I've not tried it yet, but `--video retain-on-failure` only keeps that video if the test fails.
82  
83  Here's [documentation on all of the options](https://playwright.dev/python/docs/test-runners).
84  
85  ## Running a localhost static server during the tests
86  
87  I wanted to run the tests against the most recent version of my code, which consists of an `index.html` file and a `webworker.js` file. Because these use web workers they need to be run from an actual localhost web server, so I needed to start one at the beginning of the tests and shut it down at the end.
88  
89  I wrote about my solution for this in another TIL: [Start a server in a subprocess during a pytest session](https://til.simonwillison.net/pytest/subprocess-server).
90  
91  ## My test suite so far
92  
93  Here's where [I've got to so far](https://github.com/simonw/datasette-lite/blob/daba69708c7a72adad20dce3c534b9a399ef11c8/tests/test_datasette_lite.py):
94  
95  ```python
97  from subprocess import Popen, PIPE
98  import pathlib
99  import pytest
100 import time
101 from http.client import HTTPConnection
104 
105 
106 @pytest.fixture(scope="module")
107 def static_server():
108     process = Popen(
129 
130 
131 @pytest.fixture(scope="module")
132 def dslite(static_server, browser: Browser) -> Page:
133     page = browser.new_page()
140 
141 
142 def test_initial_load(dslite: Page):
143     expect(dslite.locator("#loading-indicator")).to_have_css("display", "none")
144 
145 
146 def test_has_two_databases(dslite: Page):
147     assert [el.inner_text() for el in dslite.query_selector_all("h2")] == [
148         "fixtures",
151 
152 
153 def test_navigate_to_database(dslite: Page):
154     h2 = dslite.query_selector("h2")
155     assert h2.inner_text() == "fixtures"
174 ## Running it in GitHub Actions
175 
176 Here's the [GitHub Actions workflow](https://github.com/simonw/datasette-lite/blob/main/.github/workflows/test.yml) I'm using to run the tests:
177 
178 ```yaml
187 
188 jobs:
189   test:
190     runs-on: ubuntu-latest
191     steps:
192     - uses: actions/checkout@v3
206         pip install -r dev-requirements.txt
207         playwright install
208     - name: Run test
209       run: |
210         pytest
211 ```
212 [dev-requirements.txt](https://raw.githubusercontent.com/simonw/datasette-lite/main/dev-requirements.txt) contains this:
213 ```
214 pytest-playwright==0.3.0
215 playwright==1.24.0
216 ```

til/pytest/only-run-integration.md

1   # Opt-in integration tests with pytest --integration
2   
3   For both [s3-credentials](https://github.com/simonw/s3-credentials) and [datasette-publish-fly](https://github.com/simonw/datasette-publish-fly) I have a need for real-world integration tests that actually interact with the underlying APIs (AWS or Fly) to create and destroy resources on those platforms.
4   
5   Most of the time I want my tests to run without doing these. I want the option to run `pytest --integration` to opt-in to running those extra integration tests.
6   
7   Here's the pattern I'm using. First, in `tests/conftest.py`:
8   
9   ```python
10  import pytest
11  
12  
13  def pytest_addoption(parser):
14      parser.addoption(
15          "--integration",
16          action="store_true",
17          default=False,
18          help="run integration tests",
19      )
20  
21  
22  def pytest_configure(config):
23      config.addinivalue_line(
24          "markers",
25          "integration: mark test as integration test, only run with --integration",
26      )
27  
28  
29  def pytest_collection_modifyitems(config, items):
30      if config.getoption("--integration"):
31          # Also run integration tests
32          return
33      skip_integration = pytest.mark.skip(reason="use --integration option to run")
34      for item in items:
35          if "integration" in item.keywords:
36              item.add_marker(skip_integration)
37  ```
38  This implements a `@pytest.mark.integration` marker which I can use to mark any test that should be considered part of the integration test suite. These will be skipped by default... but will not be skipped if the `--integration` option is passed to `pytest`.
39  
40  Then in the tests I can either do this:
41  
42  ```python
43  @pytest.mark.integration
44  def test_integration_s3():
45      # ...
46  ```
47  Or if I have a module that only contains integration tests - `tests/test_integration.py` - I can use the following line to apply that decorator to every test in the module:
48  ```python
49  import pytest
50  
51  pytestmark = pytest.mark.integration
52  
53  def test_integration_s3():
54      # ...
55  ```

til/pytest/mocking-boto.md

1   # Mocking a Textract LimitExceededException with boto
2   
3   For [s3-ocr issue #21](https://github.com/simonw/s3-ocr/issues/21) I needed to write a test that simulates what happens when Amazon Textract returns a "LimitExceededException". When using boto this error presents itself as an exception:
4   
5   > `botocore.errorfactory.LimitExceededException: An error occurred (LimitExceededException) when calling the StartDocumentTextDetection operation: Open jobs exceed maximum concurrent job limit`
6   
7   I uses [moto](https://github.com/spulec/moto) to simulate AWS in that test suite, but moto does not yet have a mechanism for simulating Textract errors like this one.
8   
9   I ended up turning to Python mocks, here provided by the the [pytest-mock](https://pypi.org/project/pytest-mock/) fixture. Here's the test I came up with:
10  
11  ```python
12  def test_limit_exceeded_automatic_retry(s3, mocker):
13      mocked = mocker.patch("s3_ocr.cli.start_document_text_extraction")
14      # It's going to fail the first time, then succeed
33  ```python
34  def start_document_text_extraction(textract, **kwargs):
35      # Wrapper function to make this easier to mock in tests
36      return textract.start_document_text_detection(**kwargs)
37  ```
38  The most confusing thing about working with Python mocks is figuring out the string to use to mock the right piece of code. I like this pattern of refactoring the code under test to make it as simple to mock as possible.
39  
40  The code I am testing here implements automatic retries. As such, I needed the API method I am simulating to fail the first time and then succeed the second time.
41  
42  Originally I had done this with a `side_effect()` function - see below - but then [@szotten on Twitter](https://twitter.com/szotten/status/1556337221258575873) pointed out that you can instead set `mock.side_effect` to a list and it will [cycle through those items in turn](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.Mock.side_effect):
43  
44  ```python

til/pytest/mock-httpx.md

1   # How to mock httpx using pytest-mock
2   
3   I wrote this test to exercise some [httpx](https://pypi.org/project/httpx/) code today, using [pytest-mock](https://pypi.org/project/pytest-mock/).
4   
5   The key was to use `mocker.patch.object(cli, "httpx")` which patches the `httpx` module that was imported by the `cli` module.
6   
7   Here the `mocker` function argument is a fixture that is provided by `pytest-mock`.
8   
9   ```python
10  from conditional_get import cli
11  from click.testing import CliRunner
12  
13  
14  def test_performs_conditional_get(mocker):
15      m = mocker.patch.object(cli, "httpx")
16      m.get.return_value = mocker.Mock()
39          )
40  ```
41  https://github.com/simonw/conditional-get/blob/485fab46f01edd99818b829e99765ed9ce0978b5/tests/test_cli.py
42  
43  ## Mocking a JSON response
45  Here's a mock for a GraphQL POST request that returns JSON:
46  ```python
47  @pytest.fixture
48  def mock_graphql_region(mocker):
49      m = mocker.patch("datasette_publish_fly.httpx")
52      m.post.return_value.json.return_value = {"data": {"nearestRegion": {"code": "sjc"}}}
53  ```
54  https://github.com/simonw/datasette-publish-fly/blob/5253220bded001e94561e215d553f352838e7a1c/tests/test_publish_fly.py#L16-L21
55  
56  ## Mocking httpx.stream
67  https://stackoverflow.com/a/6112456 helped me figure out the following:
68  ```python
69  def test_performs_conditional_get(mocker):
70      m = mocker.patch.object(cli, "httpx")
71      m.stream.return_value.__enter__.return_value = mocker.Mock()
75      ]
76  ```
77  https://github.com/simonw/conditional-get/blob/80454f972d39e2b418572d7938146830fab98fa6/tests/test_cli.py
78  
79  ## Mocking an HTTP error triggered by response.raise_for_status()
81  The `response.raise_for_status()` raises an exception if an HTTP error (e.g. a 404 or 500) occurred.
82  
83  Here's how I [mocked that to return an error](https://github.com/simonw/airtable-to-yaml/blob/ebd94b2e29d6f2ec3dc64d161495a759330027e8/tests/test_airtable_to_yaml.py#L43-L56):
84  
85  ```python
86  def test_airtable_to_yaml_error(mocker):
87      m = mocker.patch.object(cli, "httpx")
88      m.get.return_value = mocker.Mock()

til/pytest/coverage-with-context.md

1   # pytest coverage with context
2   
3   [This tweet](https://twitter.com/mariatta/status/1499863816489734146) from \@Mariatta tipped me off to the ability to measure "contexts" when [running coverage](https://coverage.readthedocs.io/en/6.3.2/contexts.html#context-reporting) - as a way to tell which tests exercise which specific lines of code.
4   
5   My [sqlite-utils](https://github.com/simonw/sqlite-utils) project uses `pytest` for the test suite. I decided to figure out how to get this working with [pytest-cov](https://pypi.org/project/pytest-cov/).
6   
7   After some experimentation, this is the recipe that worked for me:
8   
9   ```
10  # In the virtual environment, make sure pytest-cov is installed:
11  % pip install pytest-cov
12  # First, run pytest to calculate coverage of the `sqlite_utils` package, with context
13  % pytest --cov=sqlite_utils --cov-context=test
14  # The .coverage file is actually a SQLite database:
15  % ls -lah .coverage
23  Here's what one of the pages looks like, displaying the context for some lines of code:
24  
25  ![The code has an expandable section which reveals which tests executed each individual line.](https://user-images.githubusercontent.com/9599/156860441-66e35994-653a-4ab7-b690-4d901fc57750.png)
26  
27  ## The .coverage schema

til/pytest/async-fixtures.md

1   # Async fixtures with pytest-asyncio
2   
3   I wanted to use a fixture with `pytest-asyncio` that was itsef as `async def` function, so that it could execute `await` statements.
4   
5   Since I'm using a `pytest.ini` file containing `asyncio_mode = strict` I had to use the `@pytest_asyncio.fixture` fixture to get this to work. Without that fixture I got this error:
6   
7   ```
9   E   AssertionError: assert False
10  E    +  where False = _has_explicit_asyncio_mark(<function ds_with_route at 0x11332d2d0>)
11  E    +    where <function ds_with_route at 0x11332d2d0> = <FixtureDef argname='ds_with_route' scope='function' baseid='tests/test_routes.py'>.func
12  ```
13  
14  Swapping `@pytest.fixture` for `@pytest_asyncio.fixture` fixed this problem:
15  
16  ```python
17  import pytest_asyncio
18  
19  @pytest_asyncio.fixture
20  async def ds_with_route():
21      ds = Datasette()

til/pytest/assert-dictionary-subset.md

1   # Asserting a dictionary is a subset of another dictionary
2   
3   My [lazy approach to writing unit tests](https://simonwillison.net/2020/Feb/11/cheating-at-unit-tests-pytest-black/) means that sometimes I want to run an assertion against most (but not all) of a dictionary.
4   
5   Take for example an API endpoint that returns something like this:
12  }
13  ```
Powered by Datasette