ripgrep

whosonfirst-datasette/Dockerfile

1   FROM datasetteproject/datasette

2   RUN apt update && apt install -y wget bzip2

3   RUN wget -q https://latest.datasette.io/fixtures.db

4   # RUN datasette inspect --inspect-file=/mnt/whosonfirst/inspect.json /fixtures.db /mnt/whosonfirst/whosonfirst-data-latest.db

5   EXPOSE 8001

6   CMD ["datasette", "-h", "0.0.0.0", "-p", "8001", "/mnt/whosonfirst/whosonfirst-data-latest.db", "/fixtures.db", "--cors"]

yaml-to-sqlite/tests/test_cli.py

1   from click.testing import CliRunner

2   from yaml_to_sqlite import cli

3   import sqlite_utils

31  def test_without_pk(tmpdir):

32      db_path = tmpdir / "db.db"

33      assert (

46  def test_with_pk(tmpdir):

47      db_path = tmpdir / "db.db"

48      assert (

61  def test_single_column(tmpdir):

62      db_path = tmpdir / "db.db"

63      test_yaml = "- One\n" "- Two\n" "- Three\n"

64      assert (

65          0

68              cli.cli,

69              [str(db_path), "numbers", "-", "--single-column", "name"],

70              input=test_yaml,

71          )

72          .exit_code

80  def test_alters_if_necessary(tmpdir):

81      db_path = tmpdir / "db.db"

82      assert (

yaml-to-sqlite/setup.py

24      packages=find_packages(),

25      install_requires=["click", "PyYAML", "sqlite-utils>=3.9.1"],

26      setup_requires=["pytest-runner"],

27      extras_require={"test": ["pytest"]},

28      entry_points="""

29          [console_scripts]

30          yaml-to-sqlite=yaml_to_sqlite.cli:cli

31      """,

32      tests_require=["yaml-to-sqlite[test]"],

33      url="https://github.com/simonw/yaml-to-sqlite",

34      classifiers=[

vaccinate-ca-datasette/metadata.yml

5   about_url: https://github.com/simonw/vaccinate-ca-datasette

6   description_html: |-

7     <a href="/vaccinateca/locations?_facet=Affiliation&_facet=Latest+report+yes%3F&Latest+report+yes%3F=1&_facet_array=Availability+Info">Latest places that reported yes, on a map</a>

twitter-to-sqlite/tests/test_migrations.py

1   import sqlite_utils

2   from click.testing import CliRunner

3   import sqlite_utils

4   from twitter_to_sqlite import cli, migrations

6   from .test_import import zip_contents_path

7   from .test_save_tweets import db, tweets

10  def test_no_migrations_on_first_run(tmpdir, zip_contents_path):

11      output = str(tmpdir / "output.db")

12      args = ["import", output, str(zip_contents_path / "follower.js")]

23  def test_convert_source_column():

24      db = sqlite_utils.Database(memory=True)

25      db["tweets"].insert_all(

53  def test_convert_source_column_against_real_database(db):

54      assert "migrations" not in db.table_names()

55      migrations.convert_source_column(db)

twitter-to-sqlite/tests/test_save_tweets.py

2   import pathlib

4   import pytest

5   import sqlite_utils

6   from twitter_to_sqlite import utils

9   @pytest.fixture

10  def tweets():

11      return json.load(open(pathlib.Path(__file__).parent / "tweets.json"))

14  @pytest.fixture

15  def db(tweets):

16      db = sqlite_utils.Database(memory=True)

21  def test_tables(db):

22      assert {

23          "sources",

54  def test_users(db):

55      user_rows = list(db["users"].rows)

56      assert [

186 def test_tweets(db):

187     tweet_rows = list(db["tweets"].rows)

188     assert [

317 def test_sources(db):

318     source_rows = list(db["sources"].rows)

319     assert [

343 def test_places(db):

344     place_rows = list(db["places"].rows)

345     assert [

361 def test_media(db):

362     media_rows = list(db["media"].rows)

363     media_tweets_rows = list(db["media_tweets"].rows)

twitter-to-sqlite/tests/test_import.py

2   import pathlib

4   import pytest

5   import sqlite_utils

6   from click.testing import CliRunner

7   from twitter_to_sqlite import cli

12  @pytest.fixture

13  def zip_contents_path():

14      return pathlib.Path(__file__).parent / "zip_contents"

17  @pytest.fixture

18  def import_test_zip(tmpdir, zip_contents_path):

19      archive = str(tmpdir / "archive.zip")

20      buf = io.BytesIO()

27  def test_create_zip(zip_contents_path):

28      zf = create_zip(zip_contents_path)

29      assert {

39  def test_cli_import_zip_file(import_test_zip):

40      tmpdir, archive = import_test_zip

41      output = str(tmpdir / "output.db")

42      result = CliRunner().invoke(cli.cli, ["import", output, archive])

48  def test_cli_import_folder(tmpdir, zip_contents_path):

49      output = str(tmpdir / "output.db")

50      result = CliRunner().invoke(cli.cli, ["import", output, str(zip_contents_path)])

56  def test_cli_import_specific_files(tmpdir, zip_contents_path):

57      output = str(tmpdir / "output.db")

58      result = CliRunner().invoke(

111 def test_deletes_existing_archive_tables(import_test_zip):

112     tmpdir, archive = import_test_zip

113     output = str(tmpdir / "output.db")

114     db = sqlite_utils.Database(output)

twitter-to-sqlite/setup.py

37          "python-dateutil",

38      ],

39      extras_require={"test": ["pytest"]},

40      tests_require=["twitter-to-sqlite[test]"],

41  )

todomvc-datasette/js/template.js

21  	var escape = function (string) {

22  		return (string && reHasUnescapedHtml.test(string))

23  			? string.replace(reUnescapedHtml, escapeHtmlChar)

24  			: string;

todomvc-datasette/js/store.js

1   /*jshint eqeqeq:false */

2   // Signed token for the todomvc user on https://latest.datasette.io/

3   // See https://github.com/simonw/todomvc-datasette/issues/2

4   let TOKEN = 'dstok_eyJhIjoidG9kb212YyIsInRva2VuIjoiZHN0b2siLCJ0IjoxNjY5OTQ0NjgzfQ.wzSKl10MouQaKZ0FuTKEftNJko4';

13      let self = this;

14      self._dbName = `todo_${name}`;

15      fetch("https://latest.datasette.io/ephemeral/-/create", {

16        method: "POST",

17        mode: "cors",

48      }

49      fetch(

50        `https://latest.datasette.io/ephemeral/${

51          this._dbName

52        }.json?_shape=array&_sort_desc=id&completed=${

76      callback = callback || function () {};

77      fetch(

78        `https://latest.datasette.io/ephemeral/${self._dbName}.json?_shape=array&_sort_desc=id`,

79        {

80          mode: "cors",

107     if (id) {

108       fetch(

109         `https://latest.datasette.io/ephemeral/${self._dbName}/${id}/-/update`,

110         {

111           method: "POST",

124     } else {

125       // Save it and store ID

126       fetch(`https://latest.datasette.io/ephemeral/${self._dbName}/-/insert`, {

127         method: "POST",

128         mode: "cors",

151   Store.prototype.remove = function (id, callback) {

152     fetch(

153       `https://latest.datasette.io/ephemeral/${this._dbName}/${id}/-/delete`,

154       {

155         method: "POST",

todomvc-datasette/README.md

1   # TodoMVC demo backed by Datasette

3   This is a modified version of the [Vanilla JavaScript TodoMVC Example](https://github.com/tastejs/todomvc/tree/gh-pages/examples/vanillajs) adapted to demonstrate the [Datasette 1.0 alpha JSON API](https://docs.datasette.io/en/latest/changelog.html#a0-2022-11-29).

5   To try this out, visit https://todomvc.datasette.io/

tableau-to-sqlite/tests/test_tableau_to_sqlite.py

1   from click.testing import CliRunner

2   from tableau_to_sqlite.cli import cli

3   import pathlib

10  @vcr.use_cassette(str(fixtures / "cassette.yml"))

11  def test_run(tmpdir):

12      runner = CliRunner()

13      db_path = str(tmpdir / "tableau.db")

tableau-to-sqlite/setup.py

33      """,

34      install_requires=["click", "TableauScraper==0.1.3"],

35      extras_require={"test": ["pytest", "vcrpy"]},

36      tests_require=["tableau-to-sqlite[test]"],

37      python_requires=">=3.6",

38  )

tableau-to-sqlite/README.md

66      pipenv shell

68  Now install the dependencies and tests:

70      pip install -e '.[test]'

72  To run the tests:

74      pytest

til/yaml/yamlfmt.md

9   They suggest this:

10  ```bash

11  go install github.com/google/yamlfmt/cmd/yamlfmt@latest

12  ```

13  This worked on my machine because I had Go installed via Homebrew (`go version go1.20.4 darwin/arm64` according to `go version`).

35      types: [created]

36  jobs:

37    test:

38      runs-on: ubuntu-latest

39      strategy:

40        matrix:

72  jobs:

73    test:

74      runs-on: ubuntu-latest

75      strategy:

76        matrix:

til/typescript/basic-tsc.md

22      % ./node_modules/.bin/tsc --init

24  Next step: create a `.ts` file to start testing it out. I put the following in `greetings.ts`:

26  ```typescript

til/sqlite/list-all-columns-in-a-database.md

3   Here's a devious trick for listing ALL columns in a SQLite database, using a SQL query that generates another SQL query.

5   The first query ([demo](https://latest.datasette.io/fixtures?sql=select+group_concat%28%0D%0A++%22select+%27%22+%7C%7C+name+%7C%7C+%22%27+as+table_name%2C+*+from+pragma_table_info%28%27%22+%7C%7C+name+%7C%7C+%22%27%29%22%0D%0A%2C+%27+union+%27%29+%7C%7C+%27+order+by+table_name%2C+cid%27%0D%0A++from+sqlite_master+where+type+%3D+%27table%27%3B)):

7   ```sql

11    from sqlite_master where type = 'table';

12  ```

13  This outputs the second query, which will look something like this ([demo](https://latest.datasette.io/fixtures?sql=select+%27simple_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27simple_primary_key%27%29+union+select+%27primary_key_multiple_columns%27+as+table_name%2C+*+from+pragma_table_info%28%27primary_key_multiple_columns%27%29+union+select+%27primary_key_multiple_columns_explicit_label%27+as+table_name%2C+*+from+pragma_table_info%28%27primary_key_multiple_columns_explicit_label%27%29+union+select+%27compound_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27compound_primary_key%27%29+union+select+%27compound_three_primary_keys%27+as+table_name%2C+*+from+pragma_table_info%28%27compound_three_primary_keys%27%29+union+select+%27foreign_key_references%27+as+table_name%2C+*+from+pragma_table_info%28%27foreign_key_references%27%29+union+select+%27sortable%27+as+table_name%2C+*+from+pragma_table_info%28%27sortable%27%29+union+select+%27no_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27no_primary_key%27%29+union+select+%27123_starts_with_digits%27+as+table_name%2C+*+from+pragma_table_info%28%27123_starts_with_digits%27%29+union+select+%27Table+With+Space+In+Name%27+as+table_name%2C+*+from+pragma_table_info%28%27Table+With+Space+In+Name%27%29+union+select+%27table%2Fwith%2Fslashes.csv%27+as+table_name%2C+*+from+pragma_table_info%28%27table%2Fwith%2Fslashes.csv%27%29+union+select+%27complex_foreign_keys%27+as+table_name%2C+*+from+pragma_table_info%28%27complex_foreign_keys%27%29+union+select+%27custom_foreign_key_label%27+as+table_name%2C+*+from+pragma_table_info%28%27custom_foreign_key_label%27%29+union+select+%27units%27+as+table_name%2C+*+from+pragma_table_info%28%27units%27%29+union+select+%27tags%27+as+table_name%2C+*+from+pragma_table_info%28%27tags%27%29+union+select+%27searchable%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable%27%29+union+select+%27searchable_tags%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_tags%27%29+union+select+%27searchable_fts%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts%27%29+union+select+%27searchable_fts_content%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_content%27%29+union+select+%27searchable_fts_segments%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_segments%27%29+union+select+%27searchable_fts_segdir%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_segdir%27%29+union+select+%27select%27+as+table_name%2C+*+from+pragma_table_info%28%27select%27%29+union+select+%27infinity%27+as+table_name%2C+*+from+pragma_table_info%28%27infinity%27%29+union+select+%27facet_cities%27+as+table_name%2C+*+from+pragma_table_info%28%27facet_cities%27%29+union+select+%27facetable%27+as+table_name%2C+*+from+pragma_table_info%28%27facetable%27%29+union+select+%27binary_data%27+as+table_name%2C+*+from+pragma_table_info%28%27binary_data%27%29+union+select+%27roadside_attractions%27+as+table_name%2C+*+from+pragma_table_info%28%27roadside_attractions%27%29+union+select+%27attraction_characteristic%27+as+table_name%2C+*+from+pragma_table_info%28%27attraction_characteristic%27%29+union+select+%27roadside_attraction_characteristics%27+as+table_name%2C+*+from+pragma_table_info%28%27roadside_attraction_characteristics%27%29+order+by+table_name%2C+cid)):

14  ```sql

15  select 'simple_primary_key' as table_name, * from pragma_table_info('simple_primary_key') union

156   sqlite_master.name

157 ```

158 [Demo](https://latest.datasette.io/fixtures?sql=select%0D%0A++sqlite_master.name+as+table_name%2C%0D%0A++table_info.*%0D%0Afrom%0D%0A++sqlite_master%0D%0A++join+pragma_table_info%28sqlite_master.name%29+as+table_info%0D%0Aorder+by%0D%0A++sqlite_master.name%2C%0D%0A++table_info.cid).

160 This works with the `pragma_table_info` and `pragma_index_list` and `pragma_foreign_key_list` functions too.

178   columnName

179 ```

180 [Demo](https://latest.datasette.io/fixtures?sql=SELECT+m.name+as+tableName%2C+p.name+as+columnName%0D%0A++++++++FROM+sqlite_master+m%0D%0A++++++++LEFT+OUTER+JOIN+pragma_table_info%28%28m.name%29%29+p+ON+m.name+%3C%3E+p.name%0D%0A++++++++WHERE+m.type+IN+%28%27table%27%2C%27view%27%29+AND+m.name+NOT+LIKE+%27sqlite_%25%27%0D%0A++++++++ORDER+BY+tableName%2C+columnName).

til/sqlite/unix-timestamp-milliseconds-sqlite.md

16  )

17  ```

18  [Try these both here](https://latest.datasette.io/_memory?sql=select%0D%0A++strftime%28%27%25s%27%2C+%27now%27%29+as+seconds_since_epoch%2C%0D%0A++cast%28%28julianday%28%27now%27%29+-+2440587.5%29+*+86400+*+1000+as+integer%29+as+ms_since_epoch%3B).

20  In SQLite 3.42.0 and higher you can do this instead:

30  select strftime('%Y-%m-%d %H:%M:%S', :timestamp_ms / 1000, 'unixepoch')

31  ```

32  The output looks like this: `2023-04-09 05:04:24` - [try that out here](https://latest.datasette.io/_memory?sql=select+strftime%28%27%25Y-%25m-%25d+%25H%3A%25M%3A%25S%27%2C+%3Atimestamp_ms+%2F+1000%2C+%27unixepoch%27%29%0D%0A&timestamp_ms=1681016664769).

34  ## Why not multiply seconds by 1000?

86    ) as timestamp_ms

87  ```

88  [Try that here](https://latest.datasette.io/_memory?sql=select%0D%0A++(1000+*+(strftime(%27%25s%27%2C+%27now%27)))+%2B+cast(%0D%0A++++substr(%0D%0A++++++strftime(%27%25f%27%2C+%27now%27)%2C%0D%0A++++++instr(strftime(%27%25f%27%2C+%27now%27)%2C+%27.%27)+%2B+1%0D%0A++++)+as+integer%0D%0A++)+as+timestamp_ms).

90  The `substr('18.413', instr('18.413', '.') + 1)` part returns just the characters after the first `.` character, which are then cast to integer to get the milliseconds fraction of that second. These are added to `1000 * ` the unix timestamp in seconds.

til/sqlite/track-timestamped-changes-to-a-table.md

75  INSERT INTO foo VALUES ('hello4');

76  ```

77  To test this I ran `sqlite3` (with no arguments, which provides an in-memory database to play with), pasted in the above and then ran this:

78  ```

79  sqlite> .headers on

til/sqlite/text-value-is-integer-or-float.md

17  The `|| '.0'` bit there is needed because `cast('1' as REAL)` returns `1.0`, not just `1`.

19  (Note that `1.200` will not pass this test and will be incorrectly considered an invalid floating point representation)

21  ## Demos

42    )

43  ```

44  [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++value%2C%0D%0A++cast%28cast%28value+AS+REAL%29+AS+TEXT%29+in+%28value%2C+value+%7C%7C+%27.0%27%29+as+is_valid_float%0D%0Afrom%0D%0A++%28%0D%0A++++select%0D%0A++++++%271%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%271.1%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%27dog%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++null+as+value%0D%0A++%29)

46  | value | is_valid_float |

71    )

72  ```

73  [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++value%2C%0D%0A++cast%28cast%28value+AS+INTEGER%29+AS+TEXT%29+%3D+value+as+is_valid_int%0D%0Afrom%0D%0A++%28%0D%0A++++select%0D%0A++++++%271%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%271.1%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%27dog%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++null+as+value%0D%0A++%29)

75  | value | is_valid_int |

til/sqlite/steampipe.md

17  Let's start with the Hacker News API plugin, [turbot/steampipe-plugin-hackernews](https://github.com/turbot/steampipe-plugin-hackernews). I like this one because it doesn't require an API key.

19  First grab the [latest release](https://github.com/turbot/steampipe-plugin-hackernews/releases/latest) of the extension. I'm on an M2 MacBook so I grabbed the `steampipe_sqlite_hackernews.darwin_arm64.tar.gz ` file:

21  ```bash

82  Once you've jumped through the security hooks to enable an extension it can be used directly with Datasette as well. Let's try two at once - the Hacker News one and the [crt.sh](https://hub.steampipe.io/plugins/turbot/crtsh) plugin for querying certificate transparency logs.

84  Download [the latest steampipe-plugin-crtsh](https://github.com/turbot/steampipe-plugin-crtsh/releases/latest) file - for macOS I used:

86  ```bash

til/sqlite/subqueries-in-select.md

66    10

67  ```

68  [Try that here](https://latest-with-plugins.datasette.io/github?sql=select%0D%0A++repos.full_name%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++max%28created_at%29%0D%0A++++from%0D%0A++++++releases%0D%0A++++where%0D%0A++++++repo+%3D+repos.id%0D%0A++%29+as+max_created_at%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++count%28*%29%0D%0A++++from%0D%0A++++++releases%0D%0A++++where%0D%0A++++++repo+%3D+repos.id%0D%0A++%29+as+releases_count%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++json_group_array%28%0D%0A++++++++json_object%28%0D%0A++++++++++%27id%27%2C%0D%0A++++++++++id%2C%0D%0A++++++++++%27name%27%2C%0D%0A++++++++++name%2C%0D%0A++++++++++%27created_at%27%2C%0D%0A++++++++++created_at%0D%0A++++++++%29%0D%0A++++++%29%0D%0A++++from%0D%0A++++++%28%0D%0A++++++++select%0D%0A++++++++++*%0D%0A++++++++from%0D%0A++++++++++releases%0D%0A++++++++where%0D%0A++++++++++repo+%3D+repos.id%0D%0A++++++++order+by%0D%0A++++++++++created_at+desc%0D%0A++++++++limit%0D%0A++++++++++3%0D%0A++++++%29%0D%0A++%29+as+recent_releases%0D%0Afrom%0D%0A++repos%0D%0Aorder+by%0D%0A++releases_count+desc%0D%0Alimit%0D%0A++10).

70  Here are the first three rows of the output. The `recent_releases` column includes a JSON array with details of the three most recent releases for each of those repositories.

214 limit 10;

215 ```

216 [Try that out here](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28*%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc%0D%0Alimit+10%3B).

218 The key trick here is the `rank() over` line:

221 rank() over (partition by repos.id order by releases.created_at desc) as rel_rank

222 ```

223 This adds an integer called `rel_rank` to each row in that CTE showing the relative ranking of each release, ordered by their created date. [This version of the query](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28*%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect+*+from+cte) shows the contents of the `cte` table, which starts like this:

225 | full_name                      | max_created_at       |   releases_count |   rel_id | rel_name                                                       | rel_created_at       |   rel_rank |

257     left join releases on releases.repo = repos.id

258 ```

259 This almost has the desired effect, but if [you run it](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+(%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max(releases.created_at)+over+(partition+by+repos.id)+as+max_created_at%2C%0D%0A++++count(*)+over+(partition+by+repos.id)+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank()+over+(partition+by+repos.id+order+by+releases.created_at+desc)+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A)%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array(%0D%0A++++json_object(%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++)%0D%0A++)+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc) you'll notice that it returns rows that look like this:

261 | full_name                                                            | max_created_at       |   releases_count | recent_releases                                                    |

320 order by releases_count desc

321 ```

322 [Try that finished query here](https://latest-with-plugins.datasette.io/github?sql=%0D%0Awith+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28releases.id%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+filter+%28where+rel_id+is+not+null%29+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc).

324 ## Simplified to use just one window function

353 order by releases_count desc

354 ```

355 [Try that version here](https://latest-with-plugins.datasette.io/github?sql=%0D%0Awith+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++releases.created_at%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max%28created_at%29+as+max_created_at%2C%0D%0A++count%28rel_id%29+as+releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+filter+%28where+rel_id+is+not+null+and+rel_rank+%3C%3D+3%29+as+recent_releases%0D%0Afrom+cte%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc).

357 The trick here is to select the `created_at` and `releases.id` columns in the initial CTE, then use `max(created_at) as max_created_at` and `count(rel_id) as releases_count` in the second section of the query, in order to total things up based on the group by.

til/sqlite/sqlite-tg.md

128 I decided to try implementing a version of that on top of `sqlite-tg`.

130 I grabbed the latest release of `timezones.geojson.zip` from [evansiroky/timezone-boundary-builder/](https://github.com/evansiroky/timezone-boundary-builder/releases) and unzipped it to get a `combined.geojson` file that starts like this:

132 ```json

til/sqlite/splitting-commas-sqlite.md

25  | x12 | y1234 | z12345 |

27  Here's [a live demo of the query](https://latest.datasette.io/fixtures?sql=with+comma_locations+as+%28%0D%0A++select+instr%28%3Apath%2C+%27%2C%27%29+as+first_comma%2C%0D%0A++instr%28%3Apath%2C+%27%2C%27%29+%2B+instr%28substr%28%3Apath%2C+instr%28%3Apath%2C+%27%2C%27%29+%2B+1%29%2C+%27%2C%27%29+as+second_comma%0D%0A%29%2C+variables+as+%28%0D%0A++select%0D%0A++++substr%28%3Apath%2C+0%2C+first_comma%29+as+first%2C%0D%0A++++substr%28%3Apath%2C+first_comma+%2B+1%2C+second_comma+-+first_comma+-+1%29+as+second%2C%0D%0A++++substr%28%3Apath%2C+second_comma+%2B+1%29+as+third%0D%0A++from+comma_locations%0D%0A%29%0D%0Aselect+*+from+variables&path=x12%2Cy1234%2Cz12345).

til/sqlite/simple-recursive-cte.md

10  select * from counter limit 5;

11  ```

12  This query [returns five rows](https://latest.datasette.io/_memory?sql=with+recursive+counter%28x%29+as+%28%0D%0A++select+0%0D%0A++++union%0D%0A++select+x+%2B+1+from+counter%0D%0A%29%0D%0Aselect+*+from+counter+limit+10%3B) from a single column `x` - from 0 to 4.

14  |   x |

til/sqlite/replicating-rqlite.md

5   By default `rqlite` asks you to use its own custom HTTP API - but I wanted to try running it against Datasette. rqlite author Philip O'Toole confirmed that [this should work](https://twitter.com/general_order24/status/1343619601758908419) provided any writes go through the API - each node can be configured to write to an on-disk database file which Datasette can then read from (the default is to use in-memory databases and an on-disk Raft log).

7   Here's how I got that working on my macOS laptop. I used the latest macOS binary from https://github.com/rqlite/rqlite/releases (`rqlite` is written in Go and provides pre-complied binaries for different systems).

9       cd /tmp

til/sqlite/related-content.md

16  ```sql

17  select title, rank from til_fts where til_fts match '

18    i OR wanted OR to OR run OR some OR django OR tests OR using OR

19    pytestdjango OR and OR with OR configured OR pick OR up OR the

20    OR databaseurl OR environment OR variable OR via OR djdatabaseurl

21    OR against OR a OR postgresql OR server OR running OR in OR

25  ```

27  Here are the results from [that query](https://til.simonwillison.net/tils?sql=select+title%2C+rank+from+til_fts+where+til_fts+match+%27%0D%0A++i+OR+wanted+OR+to+OR+run+OR+some+OR+django+OR+tests+OR+using+OR%0D%0A++pytestdjango+OR+and+OR+with+OR+configured+OR+pick+OR+up+OR+the%0D%0A++OR+databaseurl+OR+environment+OR+variable+OR+via+OR+djdatabaseurl%0D%0A++OR+against+OR+a+OR+postgresql+OR+server+OR+running+OR+in+OR%0D%0A++github+OR+actions+OR+it+OR+took+OR+while+OR+figure+OR+out+OR%0D%0A++right+OR+pattern+OR+trick+OR+was+OR+define+OR+postgres+OR+service%27%0D%0Aorder+by+rank+limit+5). Unsurprisingly the entry itself shows up first, but the other items look relevant enough to me:

29  title | rank

30  -- | --

31  Running tests against PostgreSQL in a service container | -61.04335068286244

32  Talking to a PostgreSQL service container from inside a Docker container | -37.54518907167069

33  Allowing a container in Docker Desktop for Mac to talk to a PostgreSQL server on the host machine | -29.712660785491842

60    5

61  ```

62  And [an example of it running](https://til.simonwillison.net/tils?sql=select%0D%0A++til.topic%2C+til.slug%2C+til.title%2C+til.created%0D%0Afrom%0D%0A++til%0D%0A++join+til_fts+on+til.rowid+%3D+til_fts.rowid%0D%0Awhere%0D%0A++til_fts+match+%3Awords%0D%0A++and+not+(%0D%0A++++til.slug+%3D+%3Aslug%0D%0A++++and+til.topic+%3D+%3Atopic%0D%0A++)%0D%0Aorder+by%0D%0A++til_fts.rank%0D%0Alimit%0D%0A++5&words=i+OR+wanted+OR+to+OR+run+OR+some+OR+django+OR+tests+OR+using+OR+++pytestdjango+OR+and+OR+with+OR+configured+OR+pick+OR+up+OR+the+++OR+databaseurl+OR+environment+OR+variable+OR+via+OR+djdatabaseurl+++OR+against+OR+a+OR+postgresql+OR+server+OR+running+OR+in+OR+++github+OR+actions+OR+it+OR+took+OR+while+OR+figure+OR+out+OR+++right+OR+pattern+OR+trick+OR+was+OR+define+OR+postgres+OR+service&slug=postgresq-service-container&topic=github-actions), which returns the following:

64  topic | slug | title | created

til/sqlite/pragma-function-list.md

33  select json_extract('{"foo": "bar", "bar": "baz"}', '$.foo', '$.bar') as bits

34  ```

35  This [returns](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%22foo%22%3A+%22bar%22%2C+%22bar%22%3A+%22baz%22%7D%27%2C+%27%24.foo%27%2C+%27%24.bar%27%29+as+bits) a JSON array corresponding to each argument past the first one:

37  ```json

68    pragma_function_list()

69  ```

70  [Run against a Datasette instance](https://latest.datasette.io/_memory?sql=select%0D%0A++name%2C%0D%0A++narg%2C%0D%0A++flags%2C%0D%0A++type%2C%0D%0A++flags+%26+0x000000800+%21%3D+0+as+deterministic%2C%0D%0A++flags+%26+0x000080000+%21%3D+0+as+directonly%2C%0D%0A++flags+%26+0x000100000+%21%3D+0+as+subtype%2C%0D%0A++flags+%26+0x000200000+%21%3D+0+as+innocuous%0D%0Afrom%0D%0A++pragma_function_list%28%29) returns the following data (truncated to the highlights):

72  | name | narg | flags | type | deterministic | directonly | subtype | innocuous |

til/sqlite/one-line-csv-operations.md

13        'SELECT passenger_count, COUNT(*), AVG(total_amount) FROM taxi GROUP BY passenger_count'

15  You can get `taxi.csv` by downloading the compressed file from [here](https://github.com/multiprocessio/dsq/blob/43e72ff1d2c871082fed0ae401dd59e2ff9f6cfe/testdata/taxi.csv.7z) and running:

17      7z e -aos taxi.csv.7z

til/sqlite/now-argument-stability.md

18  That `strftime()` pattern is [described here](https://stackoverflow.com/questions/17574784/sqlite-current-timestamp-with-milliseconds/56895050#56895050) and [in this TIL](https://til.simonwillison.net/sqlite/track-timestamped-changes-to-a-table), it returns the current Unix timestamp in milliseconds.

20  The [result of the above query](https://latest.datasette.io/_memory?sql=select+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29+as+t1%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29) is:

22  | t1 |

til/sqlite/ld-preload.md

44      cd SQLite-cf538e27

46  Now we can build the extension. The `CPPFLAGS` are optional but I found I needed them to get the full Datasette test suite to pass later on:

48      CPPFLAGS="-DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS3_PARENTHESIS -DSQLITE_ENABLE_RTREE=1" ./configure

142 ```

144 ## Running the Datasette tests

146 To run Datasette's test suite I needed to install a few extra dependencies:

148 ```

154 source venv/bin/activate

155 pip install wheel # So bdist_wheel works in next step

156 pip install -e '.[test]'

157 LD_PRELOAD=/tmp/SQLite-cf538e27/.libs/libsqlite3.so python3 -c

158 ```

til/sqlite/json-extract-path.md

32  }

33  ```

34  - `$.creatures` returns the JSON array ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures))

35  - `$.creatures[0].name` returns `Cleo` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B0%5D.name))

36  - `$.creatures[1]."weight.lb"` returns `1.6` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B1%5D.%22weight.lb%22))

38  ## \#-1 to access arrays by index from the end

44  But... you can apply a single integer subtraction operation to that `#` - so you can return the name of the last creature in the array using this:

46  - `$.creatures[#-1].name` returns `Azi` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B%23-1%5D.name))

48  Here's [the commit](https://sqlite.org/src/info/35ed68a651f) that added that custom SQLite extension in 2019.

54  `$.has\" quotes in it`

56  For example ([demo](https://latest.datasette.io/_memory?sql=select+json_extract(%27%7B%0D%0A++++%22has%5C%22+quotes+in+it%22:+%22hello%22%0D%0A%7D%27,+%27$.has%5C%22+quotes+in+it%27)&path=$.has%5C%22+quotes+in+it)):

58  ```sql

65  ## Source code

67  The latest source code for the JSON module can be found in [ext/misc/json.c](https://www3.sqlite.org/src/file?name=ext/misc/json.c) - in particular the `static JsonNode *jsonLookup(...)

68  ` function.

70  The unit tests are really useful - those are spread across these six files:

72  - [test/json1.test](https://www3.sqlite.org/src/file?name=test/json1.test)

73  - [test/json101.test](https://www3.sqlite.org/src/file?name=test/json101.test)

74  - [test/json102.test](https://www3.sqlite.org/src/file?name=test/json102.test)

75  - [test/json103.test](https://www3.sqlite.org/src/file?name=test/json103.test)

76  - [test/json104.test](https://www3.sqlite.org/src/file?name=test/json104.test)

77  - [test/json105.test](https://www3.sqlite.org/src/file?name=test/json105.test) - this one has the tests for `[#]` syntax.

til/sqlite/function-list.md

14  select * from pragma_function_list()

15  ```

16  [Here's that for latest.datasette.io](https://latest.datasette.io/fixtures?sql=select+*+from+pragma_function_list%28%29) (truncated):

18  | name                          |   builtin | type   | enc   |   narg |   flags |

30  select "'" || group_concat(name, "', '") || "'" from pragma_function_list()

31  ```

32  Run [against latest.datasette.io](https://latest.datasette.io/fixtures?sql=select+%22%27%22+||+group_concat(name%2C+%22%27%2C+%27%22)+||+%22%27%22+from+pragma_function_list()) this returned the following:

34  `'pow', 'group_concat', 'group_concat', 'json_type', 'json_type', 'julianday', 'ntile', 'nullif', 'sqlite_compileoption_get', 'json_valid', 'json_quote', 'json_patch', '->', 'json_array', 'current_timestamp', 'power', 'sqlite_compileoption_used', 'json_remove', 'json_object', 'json_insert', '->>', 'sin', 'sum', 'quote', 'printf', 'likelihood', 'json_replace', 'json_extract', 'last_value', 'rank', 'sign', 'sqrt', 'sinh', 'tan', 'round', 'round', 'rtrim', 'rtrim', 'nth_value', 'tanh', 'random', 'trim', 'trim', 'time', 'radians', 'trunc', 'total', 'substr', 'substr', 'replace', 'upper', 'subtype', 'typeof', 'load_extension', 'load_extension', 'soundex', 'json_group_array', 'avg', 'abs', 'json_group_object', 'json_array_length', 'json_array_length', 'strftime', 'atan', 'asin', 'acos', 'substring', 'substring', 'randomblob', 'unicode', 'percent_rank', 'row_number', 'atanh', 'asinh', 'acosh', 'cos', 'atan2', 'last_insert_rowid', 'sqlite_log', 'unlikely', 'cosh', 'ceil', 'char', 'unixepoch', 'exp', 'count', 'count', 'date', 'ceiling', 'total_changes', 'changes', 'sqlite_version', 'degrees', 'floor', 'coalesce', 'glob', 'zeroblob', 'hex', 'iif', 'sqlite_source_id', 'format', 'datetime', 'cume_dist', 'ln', 'instr', 'json', 'dense_rank', 'log', 'log', 'ifnull', 'current_date', 'current_time', 'lag', 'lag', 'lag', 'mod', 'log2', 'like', 'like', 'max', 'max', 'min', 'min', 'lead', 'lead', 'lead', 'log10', 'lower', 'ltrim', 'ltrim', 'first_value', 'pi', 'length', 'likely', 'json_set', 'escape_fts', 'prepare_connection_args', 'convert_units', 'sleep', 'rtreedepth', 'match', 'snippet', 'fts5_source_id', 'offsets', 'matchinfo', 'matchinfo', 'optimize', 'rtreecheck', 'rtreenode', 'highlight', 'bm25', 'fts3_tokenizer', 'fts3_tokenizer', 'fts5'`

36  ## Comparing via copy-and-paste

38  To see the functions that were registered for https://sqlite-extension-examples.fly.dev/ but not for https://latest.datasette.io/ I used the above output to construct the following query:

40  `select name from pragma_function_list() where name not in ('pow', 'group_concat', 'group_concat', 'json_type', 'json_type', 'julianday', 'ntile', 'nullif', 'sqlite_compileoption_get', 'json_valid', 'json_quote', 'json_patch', '->', 'json_array', 'current_timestamp', 'power', 'sqlite_compileoption_used', 'json_remove', 'json_object', 'json_insert', '->>', 'sin', 'sum', 'quote', 'printf', 'likelihood', 'json_replace', 'json_extract', 'last_value', 'rank', 'sign', 'sqrt', 'sinh', 'tan', 'round', 'round', 'rtrim', 'rtrim', 'nth_value', 'tanh', 'random', 'trim', 'trim', 'time', 'radians', 'trunc', 'total', 'substr', 'substr', 'replace', 'upper', 'subtype', 'typeof', 'load_extension', 'load_extension', 'soundex', 'json_group_array', 'avg', 'abs', 'json_group_object', 'json_array_length', 'json_array_length', 'strftime', 'atan', 'asin', 'acos', 'substring', 'substring', 'randomblob', 'unicode', 'percent_rank', 'row_number', 'atanh', 'asinh', 'acosh', 'cos', 'atan2', 'last_insert_rowid', 'sqlite_log', 'unlikely', 'cosh', 'ceil', 'char', 'unixepoch', 'exp', 'count', 'count', 'date', 'ceiling', 'total_changes', 'changes', 'sqlite_version', 'degrees', 'floor', 'coalesce', 'glob', 'zeroblob', 'hex', 'iif', 'sqlite_source_id', 'format', 'datetime', 'cume_dist', 'ln', 'instr', 'json', 'dense_rank', 'log', 'log', 'ifnull', 'current_date', 'current_time', 'lag', 'lag', 'lag', 'mod', 'log2', 'like', 'like', 'max', 'max', 'min', 'min', 'lead', 'lead', 'lead', 'log10', 'lower', 'ltrim', 'ltrim', 'first_value', 'pi', 'length', 'likely', 'json_set', 'escape_fts', 'prepare_connection_args', 'convert_units', 'sleep', 'rtreedepth', 'match', 'snippet', 'fts5_source_id', 'offsets', 'matchinfo', 'matchinfo', 'optimize', 'rtreecheck', 'rtreenode', 'highlight', 'bm25', 'fts3_tokenizer', 'fts3_tokenizer', 'fts5')`

42  This [returned]([https://latest.datasette.io/fixtures?sql=select+%22%27%22+||+group_concat(name%2C+%22%27%2C+%27%22)+||+%22%27%22+from+pragma_function_list()](https://sqlite-extension-examples.fly.dev/data?sql=select+name+from+pragma_function_list%28%29+where+name+not+in+%28%27pow%27%2C+%27group_concat%27%2C+%27group_concat%27%2C+%27json_type%27%2C+%27json_type%27%2C+%27julianday%27%2C+%27ntile%27%2C+%27nullif%27%2C+%27sqlite_compileoption_get%27%2C+%27json_valid%27%2C+%27json_quote%27%2C+%27json_patch%27%2C+%27-%3E%27%2C+%27json_array%27%2C+%27current_timestamp%27%2C+%27power%27%2C+%27sqlite_compileoption_used%27%2C+%27json_remove%27%2C+%27json_object%27%2C+%27json_insert%27%2C+%27-%3E%3E%27%2C+%27sin%27%2C+%27sum%27%2C+%27quote%27%2C+%27printf%27%2C+%27likelihood%27%2C+%27json_replace%27%2C+%27json_extract%27%2C+%27last_value%27%2C+%27rank%27%2C+%27sign%27%2C+%27sqrt%27%2C+%27sinh%27%2C+%27tan%27%2C+%27round%27%2C+%27round%27%2C+%27rtrim%27%2C+%27rtrim%27%2C+%27nth_value%27%2C+%27tanh%27%2C+%27random%27%2C+%27trim%27%2C+%27trim%27%2C+%27time%27%2C+%27radians%27%2C+%27trunc%27%2C+%27total%27%2C+%27substr%27%2C+%27substr%27%2C+%27replace%27%2C+%27upper%27%2C+%27subtype%27%2C+%27typeof%27%2C+%27load_extension%27%2C+%27load_extension%27%2C+%27soundex%27%2C+%27json_group_array%27%2C+%27avg%27%2C+%27abs%27%2C+%27json_group_object%27%2C+%27json_array_length%27%2C+%27json_array_length%27%2C+%27strftime%27%2C+%27atan%27%2C+%27asin%27%2C+%27acos%27%2C+%27substring%27%2C+%27substring%27%2C+%27randomblob%27%2C+%27unicode%27%2C+%27percent_rank%27%2C+%27row_number%27%2C+%27atanh%27%2C+%27asinh%27%2C+%27acosh%27%2C+%27cos%27%2C+%27atan2%27%2C+%27last_insert_rowid%27%2C+%27sqlite_log%27%2C+%27unlikely%27%2C+%27cosh%27%2C+%27ceil%27%2C+%27char%27%2C+%27unixepoch%27%2C+%27exp%27%2C+%27count%27%2C+%27count%27%2C+%27date%27%2C+%27ceiling%27%2C+%27total_changes%27%2C+%27changes%27%2C+%27sqlite_version%27%2C+%27degrees%27%2C+%27floor%27%2C+%27coalesce%27%2C+%27glob%27%2C+%27zeroblob%27%2C+%27hex%27%2C+%27iif%27%2C+%27sqlite_source_id%27%2C+%27format%27%2C+%27datetime%27%2C+%27cume_dist%27%2C+%27ln%27%2C+%27instr%27%2C+%27json%27%2C+%27dense_rank%27%2C+%27log%27%2C+%27log%27%2C+%27ifnull%27%2C+%27current_date%27%2C+%27current_time%27%2C+%27lag%27%2C+%27lag%27%2C+%27lag%27%2C+%27mod%27%2C+%27log2%27%2C+%27like%27%2C+%27like%27%2C+%27max%27%2C+%27max%27%2C+%27min%27%2C+%27min%27%2C+%27lead%27%2C+%27lead%27%2C+%27lead%27%2C+%27log10%27%2C+%27lower%27%2C+%27ltrim%27%2C+%27ltrim%27%2C+%27first_value%27%2C+%27pi%27%2C+%27length%27%2C+%27likely%27%2C+%27json_set%27%2C+%27escape_fts%27%2C+%27prepare_connection_args%27%2C+%27convert_units%27%2C+%27sleep%27%2C+%27rtreedepth%27%2C+%27match%27%2C+%27snippet%27%2C+%27fts5_source_id%27%2C+%27offsets%27%2C+%27matchinfo%27%2C+%27matchinfo%27%2C+%27optimize%27%2C+%27rtreecheck%27%2C+%27rtreenode%27%2C+%27highlight%27%2C+%27bm25%27%2C+%27fts3_tokenizer%27%2C+%27fts3_tokenizer%27%2C+%27fts5%27%29)) the following list:

44  ```

86  from pragma_function_list()

87  ```

88  [Try that against latest.datasette.io](https://latest.datasette.io/_memory?sql=select+json_group_array%28distinct+name%29%0D%0Afrom+pragma_function_list%28%29)

90  Output:

til/sql/consecutive-groups.md

227   start desc;

228 ```

229 Grouping by `country, grp` returns a single row per group. The `min(created)` and `max(created)` functions can then find  the earliest and latest dates in each group.

231 I'm using `date()` to turn those `created` timestamps into `YYYY-MM-DD` dates.

til/sqlite/cte-values.md

11  select * from x

12  ```

13  [Try that here](https://latest.datasette.io/fixtures?sql=with+x%28c1%2C+c2%2C+c3%29+as+%28%0D%0A++values%0D%0A++++%28%27a%27%2C+%27b%27%2C+3%29%2C%0D%0A++++%28%27b%27%2C+%27c%27%2C+4%29%0D%0A%29%0D%0Aselect+*+from+x).

15  The output of this query is:

til/sqlite/cr-sqlite-macos.md

15  1. Install a SQLite version that isn't the macOS default, which doesn't allow loading extensions. I used `brew install sqlite3` for this.

16  2. Download the [latest release](https://github.com/vlcn-io/cr-sqlite/releases) - at time of writing for my M2 Mac that's [v0.15.1/crsqlite-darwin-aarch64.dylib](https://github.com/vlcn-io/cr-sqlite/releases/download/v0.15.1/crsqlite-darwin-aarch64.dylib).

17  3. `cd` to the same folder as the download, then run:

18      ```bash

til/sqlite/column-combinations.md

32    num_rows desc

33  ```

34  [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++++case+when+%5Bpk%5D+is+not+null+then+%27pk%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bcreated%5D+is+not+null+then+%27created%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bplanet_int%5D+is+not+null+then+%27planet_int%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bon_earth%5D+is+not+null+then+%27on_earth%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bstate%5D+is+not+null+then+%27state%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5B_city_id%5D+is+not+null+then+%27_city_id%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5B_neighborhood%5D+is+not+null+then+%27_neighborhood%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Btags%5D+is+not+null+then+%27tags%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bcomplex_array%5D+is+not+null+then+%27complex_array%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bdistinct_some_null%5D+is+not+null+then+%27distinct_some_null%2C+%27+else+%27%27+end%0D%0A++as+columns%2C%0D%0A++count%28*%29+as+num_rows%0D%0Afrom%0D%0A++%5Bfacetable%5D%0D%0Agroup+by%0D%0A++columns%0D%0Aorder+by%0D%0A++num_rows+desc).

36  This has the desired effect: it gives me back all of the combinations of not-null columns in the table, with a count for each one.

53    num_rows desc' as query from pragma_table_info(:table)

54  ```

55  [Try that out](https://latest-with-plugins.datasette.io/fixtures?sql=select+%27select%0D%0A%27+%7C%7C+group_concat%28%27++++case+when+%5B%27+%7C%7C+name+%7C%7C+%27%5D+is+not+null+then+%27+%7C%7C+quote%28name+%7C%7C+%27%2C+%27%29+%7C%7C+%27+else+%27%27%27%27+end%27%2C+%27+%7C%7C%0D%0A%27%29+%7C%7C+%27%0D%0A++as+columns%2C%0D%0A++count%28*%29+as+num_rows%0D%0Afrom%0D%0A++%5B%27+%7C%7C+%3Atable+%7C%7C+%27%5D%0D%0Agroup+by%0D%0A++columns%0D%0Aorder+by%0D%0A++num_rows+desc%27+as+query+from+pragma_table_info%28%3Atable%29&table=facetable) in a demo that includes the [datasette-query-links](https://datasette.io/plugins/datasette-query-links) plugin.

56  This takes `:table` as an input and generates SQL which can be used to generate column-combination counts.

til/sqlite/build-specific-sqlite-pysqlite-macos.md

1   # Building a specific version of SQLite with pysqlite on macOS/Linux

3   I wanted the ability to test my Python software against specific version of SQLite on macOS. I found a way to do that using [pysqlite3](https://github.com/coleifer/pysqlite3).

5   First, clone the GitHub mirror of SQLite (so I don't have to learn how to use Fossil):

31      python3 setup.py build_static build

33  The end result sits in  a `pysqlite3` folder in, on my machine, `/tmp/pysqlite3/build/lib.macosx-10.15-x86_64-3.9` - test it like this:

35      cd /tmp/pysqlite3/build/lib.macosx-10.15-x86_64-3.9

54      pip install /tmp/pysqlite3/dist/pysqlite3-0.4.6-cp39-cp39-macosx_10_15_x86_64.whl

56  This exact same process works on Linux too (tested inside a default GitHub Actions Linux worker).

til/sphinx/sphinx-autodoc.md

1   # Adding Sphinx autodoc to a project, and configuring Read The Docs to build it

3   I built a [new API reference page](https://sqlite-utils.datasette.io/en/latest/reference.html) today for `sqlite-utils`, using the Sphinx [autodoc extension](https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html) to extract docstrings from the code and use them to build a full class reference.

5   I've avoided this kind of documentation in the past because I think narrative prose is a *much* better way of providing documentation - but ``sqlite-utils`` already has [detailed narrative prose](https://sqlite-utils.datasette.io/en/stable/python-api.html), so I felt that adding reference documentation powered by docstrings could enhance that project - while also providing better inline document for tools such as Visual Studio Code and Jupyter.

143 ## The end result

145 The new page of documentation is now live at [en/latest/reference.html](https://sqlite-utils.datasette.io/en/latest/reference.html). The pull request in which I figured this all out is [sqlite-utils/pull/312](https://github.com/simonw/sqlite-utils/pull/312).

til/sphinx/literalinclude-with-markers.md

1   # literalinclude with markers for showing code in documentation

3   I [wanted to include](https://github.com/simonw/datasette/issues/1830) some example Python tests in the Datasette documentation - but since they were tests, I also wanted to execute them as part of my test suite to make sure they worked correctly.

5   I solved this with the Sphinx [literalinclude directive](https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude).

7   Here's what I put in my [docs/testing_plugins.rst#L92-L111](https://github.com/simonw/datasette/blob/0f63cb83ed31753a9bd9ec5cc71de16906767337/docs/testing_plugins.rst#L92-L111) file:

8   ```rst

9   A simple test looks like this:

11  .. literalinclude:: ../tests/test_docs.py

12     :language: python

13     :start-after: # -- start test_homepage --

14     :end-before: # -- end test_homepage --

16  Or for a JSON API:

18  .. literalinclude:: ../tests/test_docs.py

19     :language: python

20     :start-after: # -- start test_actor_is_null --

21     :end-before: # -- end test_actor_is_null --

23  To make requests as an authenticated actor, create a signed ``ds_cookie`` using the ``datasette.client.actor_cookie()`` helper function and pass it in ``cookies=`` like this:

25  .. literalinclude:: ../tests/test_docs.py

26     :language: python

27     :start-after: # -- start test_signed_cookie_actor --

28     :end-before: # -- end test_signed_cookie_actor --

29  ```

30  Note that the paths like `../tests/test_docs.py` are relative to the root `docs/` folder, which is a sibling of `tests/`.

32  Then in [tests/test_docs.py](https://github.com/simonw/datasette/blob/0f63cb83ed31753a9bd9ec5cc71de16906767337/tests/test_docs.py#L109-L141):

33  ```python

34  # fmt: off

36  # -- start test_homepage --

37  @pytest.mark.asyncio

38  async def test_homepage():

39      ds = Datasette(memory=True)

40      response = await ds.client.get("/")

41      html = response.text

42      assert "<h1>" in html

43  # -- end test_homepage --

45  # -- start test_actor_is_null --

46  @pytest.mark.asyncio

47  async def test_actor_is_null():

48      ds = Datasette(memory=True)

49      response = await ds.client.get("/-/actor.json")

50      assert response.json() == {"actor": None}

51  # -- end test_actor_is_null --

53  # -- start test_signed_cookie_actor --

54  @pytest.mark.asyncio

55  async def test_signed_cookie_actor():

56      ds = Datasette(memory=True)

57      cookies = {"ds_actor": ds.client.actor_cookie({"id": "root"})}

58      response = await ds.client.get("/-/actor.json", cookies=cookies)

59      assert response.json() == {"actor": {"id": "root"}}

60  # -- end test_signed_cookie_actor --

61  ```

62  The rendered documentation [can be seen here](https://docs.datasette.io/en/latest/testing_plugins.html#using-datasette-client-in-tests).

64  The `# fmt: off` line at the start is an instruction to [Black](https://github.com/psf/black) to ignore that section of the file. Without that, running Black would insist on adding newlines before the closing `# -- end` comments, which would show up [as blank space in the documentation](https://github.com/simonw/datasette/issues/2231).

til/sphinx/blacken-docs.md

24  ```python

25  @pytest.fixture

26  def datasette(tmp_path_factory):

27      # This fixture will be executed repeatedly for every test

28  ```

29  This is because of the missing function body. It turns out adding `...` (which looks prettier than `pass`) fixes this issue:

30  ```python

31  @pytest.fixture

32  def datasette(tmp_path_factory):

33      # This fixture will be executed repeatedly for every test

34      ...

35  ```

til/spatialite/minimal-spatialite-database-in-python.md

1   # Creating a minimal SpatiaLite database with Python

3   When writing a test for [datasette-leaflet-freedraw](https://github.com/simonw/datasette-leaflet-freedraw) I realized I didn't have a simple tiny recipe for creating an in-memory SpatiaLite database in Python. I came up with this:

5   ```python

26  /usr/local/lib/mod_spatialite.dylib

27  ```

28  I also remembered I have this script: [build_small_spatialite_db.py](https://github.com/simonw/datasette/blob/main/tests/build_small_spatialite_db.py)

til/spatialite/knn.md

1   # KNN queries with SpatiaLite

3   The latest version of SpatiaLite adds KNN support, which makes it easy to efficiently answer the question "what are the X closest records to this point".

5   The USGS earthquakes GeoJSON is a great dataset for experimenting with these features.

9   `https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson` currently contains 10,642 features.

11  To turn that into a SpatiaLite database using the latest version of [geojson-to-sqlite](https://github.com/simonw/geojson-to-sqlite):

12  ```bash

13  curl 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson' | \

til/shot-scraper/axe-core.md

24  ```json

25  {

26      "testEngine": {

27          "name": "axe-core",

28          "version": "4.7.2"

29      },

30      "testRunner": {

31          "name": "axe"

32      },

33      "testEnvironment": {

34          "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/115.0.5790.75 Safari/537.36",

35          "windowWidth": 1280,

til/selenium/selenium-python-macos.md

92  print(firefox.find_element_by_css_selector('body').text)

93  ```

94  I used `wget` for the download (rather than clicking the link in my browser) thanks to the warning here: https://firefox-source-docs.mozilla.org/testing/geckodriver/Notarization.html

96  An easier option: install it with Homebrew:

til/readthedocs/stable-docs.md

1   # Updating stable docs in ReadTheDocs without pushing a release

3   I use [ReadTheDocs](https://readthedocs.org/) for several of my projects. It's fantastic: among other things, it makes it easy to publish the documentation for my latest `main` branch at `/latest/` and the documentation for my latest release at `/stable/` (as well as maintain archived tag URLs for every prior release).

5   I can then configure the main page of my project's documentation to redirect to `/stable/` by default.

30  After some [extensive experimentation](https://github.com/simonw/simonw-readthedocs-experiments/issues/1) (that's an issue thread with 43 comments, all by me) I've found a solution.

32  The basic shape is this: rather than having ReadTheDocs serve `/stable/` from the latest tagged release of my project, I instead maintain a `stable` branch in the GitHub repository. It's this branch that becomes the default documentation on my documentation sites.

34  Then I use GitHub Actions to automate the process of updating that branch. In particular:

59  jobs:

60    update_stable_docs:

61      runs-on: ubuntu-latest

62      steps:

63      - name: Checkout repository

214 docs/cli.rst

215 sqlite_utils/cli.py

216 tests/test_cli.py

217 ```

249 Here's the sequence of steps.

251 - Go to the project's admin page, click advanced settings and switch the default version from "stable" to "latest" - then scroll to the very bottom of the page to find the save button. This step is necessary because you can't delete the "stable" version if it is set as the default.

252 - Visit the Versions tab, edit the "stable" version and uncheck the "active" box. This is the same thing as deleting it.

253 - Find the "stable" branch on that page and activate that instead. This will trigger a build of your `stable` branch and cause it to be hosted at `/stable/` on ReadTheDocs.

til/readthedocs/pip-install-docs.md

10      # ...

11      extras_require={

12          "test": ["pytest", "pytest-asyncio", "black", "cogapp", "ruff"],

13          "docs": [

14              "sphinx==7.2.6",

til/readthedocs/link-from-latest-to-stable.md

1   # Linking from /latest/ to /stable/ on Read The Docs

3   [Read The Docs](https://readthedocs.org/) has a handy feature where documentation for older versions will automatically link to the latest release, for example [on this page](https://docs.datasette.io/en/0.56/spatialite.html):

5   <img width="978" alt="A documentation page with a note that says: You are not reading the most recent version of this documentation. 0.60 is the latest version available." src="https://user-images.githubusercontent.com/9599/150437341-14554fe7-1c47-4462-a1d9-9b8d822aaea8.png">

7   That feature is enabled by a "Show version warning" check box in their Advanced Settings preference pane.

9   It's implemented by [this JavaScript](https://github.com/readthedocs/readthedocs.org/blob/0852d7c10d725d954d3e9a93513171baa1116d9f/readthedocs/core/static-src/core/js/doc-embed/version-compare.js#L13-L21) in their default theme, called [from here](https://github.com/readthedocs/readthedocs.org/blob/bc3e147770e5740314a8e8c33fec5d111c850498/readthedocs/core/static-src/core/js/doc-embed/footer.js#L66-L86).

11  I had an extra requirement: I wanted pages on my `/en/latest/` documentation (which shows documentation for the in-development `main` branch on GitHub) to link back to the `/en/stable/` equivalent - but only if that page also existed in the stable documentation.

13  I ended up [adding this snippet](https://github.com/simonw/datasette/commit/ffca55dfd7cc9b53522c2e5a2fa1ff67c9beadf2) of jQuery JavaScript to my custom ` docs/_templates/layout.html` template:

18  <script>

19  jQuery(function ($) {

20    // Show banner linking to /stable/ if this is a /latest/ page

21    if (!/\/latest\//.test(location.pathname)) {

22      return;

23    }

24    var stableUrl = location.pathname.replace("/latest/", "/stable/");

25    // Check it's not a 404

26    fetch(stableUrl, { method: "HEAD" }).then((response) => {

47  {% endblock %}

48  ```

49  The neatest piece of this solution is the way it uses an HTTP `HEAD` request via `fetch()` to confirm that the equivalent stable page exists before adding a link to it:

50  ```javascript

51    var stableUrl = location.pathname.replace("/latest/", "/stable/");

52    // Check it's not a 404

53    fetch(stableUrl, { method: "HEAD" }).then((response) => {

56  ```

58  Here's what my fix looks like, running on https://docs.datasette.io/en/latest/csv_export.html

60  <img width="978" alt="This page has a banner that says:  This documentation covers the development version of Datasette. See this page for the current stable release." src="https://user-images.githubusercontent.com/9599/150438021-0ab3db8f-7f65-4846-b2d4-880e10dce79d.png">

62  ## Alternative solution: sphinx-version-warning

64  Just minutes after I committed my fix I was informed of the existence of [sphinx-version-warning](https://sphinx-version-warning.readthedocs.io/en/latest/), a Sphinx plugin that can solve this problem too. There's an example of using that to add a message to the `/latest/` page in [its own documentation configuration here](https://github.com/humitos/sphinx-version-warning/blob/a82156c2ea08e5feab406514d0ccd9d48a345f48/docs/conf.py#L32-L38).

66  ```python

67  # -- Version Warning Banner configuration ------------------------------------

68  versionwarning_messages = {

69      'latest': 'This is a custom message only for version "latest" of this documentation.',

70  }

71  versionwarning_admonition_type = 'tip'

75  I decided to stick with my version, mainly because I like the `fetch()` solution I used.

77  GitHub issue: [ Documentation should clarify /stable/ vs /latest/ #1608](https://github.com/simonw/datasette/issues/1608)

til/readthedocs/documentation-seo-canonical.md

3   I was thinking about documentation SEO today. Like many projects, Datasette offers multiple versions of the documentation:

5   - https://docs.datasette.io/en/latest/ is the latest `main` branch on GitHub

6   - https://docs.datasette.io/en/stable/ is the most recent stable (non alpha or beta) release - currently 0.60

7   - https://docs.datasette.io/en/0.59.4/ is the documentation for that specific version - I have more than 70 of those now

23  <link rel="canonical" href="https://docs.datasette.io/en/stable/introspection.html" />

24  ```

25  Here's [their documentation](https://docs.readthedocs.io/en/latest/custom_domains.html#canonical-urls) covering this feature. I think you need to have configured a "default version" (though they may set a sensible default for that already) - for my project the page for doing that is the Advanced settings page at https://readthedocs.org/dashboard/datasette/advanced/

27  This TIL started life as [a Twitter thread](https://twitter.com/simonw/status/1484287724773203971).

til/python/too-many-open-files-psutil.md

1   # Using psutil to investigate "Too many open files"

3   I was getting this intermittent error running my Datasette test suite:

5       OSError: [Errno 24] Too many open files

7   To figure out what was going on, I used the `psutil` package and its `open_files()` method.

9   Here's the documentation for [psutil.Process.open_files](https://psutil.readthedocs.io/en/latest/#psutil.Process.open_files).

11  I ran `pip install psutil` in my virtual environment.

13  Then I ran `pytest --pdb` to drop into a Python debugger when a test failed.

15  In the debugger I ran this:

til/python/struct-endianness.md

7   ```python

8   def decode_matchinfo(buf):

9       # buf is a bytestring of unsigned integers, each 4 bytes long

10      return struct.unpack("I" * (len(buf) // 4), buf)

11  ```

44  ```python

45  def decode_matchinfo(buf):

46      # buf is a bytestring of unsigned integers, each 4 bytes long

47      return struct.unpack("<" + ("I" * (len(buf) // 4)), buf)

48  ```

til/python/stdlib-cli-tools.md

42  Then ran `rg`:

43  ```

44  rg 'if __name__ =' -l | grep -v 'test/' \

45    | grep -v 'tests/' | grep -v idlelib | grep -v turtledemo

46  ```

47  The `-l` option causes `ripgrep` to list matching files without showing the context of the match.

52  >

53  > ```bash

54  > rg 'if __name__ =' -l | grep -v -e 'test/' -e 'tests/' -e idlelib -e turtledemo

55  > ```

56  > `grep -v` still means "everything that doesn't match this" - but then the multiple `-e '...'` patterns are used to construct a "this pattern or this pattern or this pattern" filter. This saves on having to pipe through `grep -v` multiple times. Thanks for the tip, [dfc](https://news.ycombinator.com/item?id=36519308).

85  getopt.py

86  dbm/__init__.py

87  doctest.py

88  pickle.py

89  imaplib.py

290 ### nntplib

292 "nntplib built-in demo - display the latest articles in a newsgroup"

294 It defaults to `gmane.comp.python.general`:

til/python/setup-py-from-url.md

7   ```python

8       extras_require={

9           "test": ["pytest", "black", "hypothesis", "cogapp"],

10          "docs": ["furo", "sphinx-autobuild", "codespell", "sphinx-copybutton"],

11          "mypy": [

36  ```python

37      extras_require={

38          "test": ["pytest", "black", "hypothesis", "cogapp"],

39          "docs": [

40              "furo",

til/python/rye.md

1   # A few notes on Rye

3   [Rye](https://github.com/mitsuhiko/rye) is Armin Ronacher's new experimental Python packaging tool. I decided to take it for a test-run.

5   ## Installing Rye

til/python/quick-testing-pyenv.md

1   # Quickly testing code in a different Python version using pyenv

3   I had [a bug](https://github.com/simonw/llm/issues/82#issuecomment-1629735729) that was only showing up in CI against Python 3.8.

5   I used the following pattern with [pyenv](https://github.com/pyenv/pyenv) to quickly run the tests against that specific version.

7   (I had previously installed `pyenv` using `brew install pyenv`.)

58  Then installed my current project into that environment like so:

59  ```bash

60  /tmp/py38env/bin/pip install -e '.[test]'

61  ```

62  Now I can run the tests like this:

63  ```bash

64  /tmp/py38env/bin/pytest

65  ```

til/python/pypy-macos.md

44      sys.exit(cli())

45  ```

46  Then I tested an import against a large CSV file like so:

47  ```

48  (venv) /tmp % time sqlite-utils insert pypy.db t /tmp/en.openfoodfacts.org.products.csv --csv

til/python/pyproject.md

135 ## It finds the Python files for you

137 With `setup.py` I'm used to putting quite a bit of effort into telling Python which files should be included in the package - and making sure it doesn't include the `tests/` and `docs/` folder.

139 As far as I can tell, the default behaviour now is to find all `*.py` files and all `*/*.py` files and include those - but to exclude common patterns such as `tests/` and `docs/` and `tests.py` and `test_*.py`.

141 This behaviour is defined by `setuptools`. The [Automatic Discovery](https://setuptools.pypa.io/en/latest/userguide/package_discovery.html#automatic-discovery) section of the `setuptools` documentation describes these rules in detail.

143 ## Adding metadata

203 ## Test dependencies

205 I like being able to run `pip install -e '.[test]'` to install test dependencies - things like `pytest`, which are needed to run the project tests but shouldn't be bundled with the project itself when it is installed.

207 Those can be added in a section like this:

209 ```toml

210 [project.optional-dependencies]

211 test = ["pytest"]

212 ```

213 I added that to my `/tmp/demo-package/pyproject.toml` file, then ran this in my elsewhere virtual environment:

214 ```bash

215 pip install -e '/tmp/demo-package[test]'

216 ```

217 The result was an installation of `pytest`, visible when I ran `pip freeze`.

219 ## Package data

til/python/pip-tools.md

14  psycopg2-binary

15  dj-database-url

16  pytest-django

17  django-extensions

18  django-htmx

21  httpx

22  sentry-sdk

23  pytest-httpx

24  ics==0.7

25  ```

28  > `django.utils.deprecation.RemovedInDjango41Warning: 'django_extensions' defines default_app_config = 'django_extensions.apps.DjangoExtensionsConfig'.`

30  I'm pinning `ics` to the exact version 0.7 due to a broken test I experienced with more recent versions.

32  ## Compiling requirements.txt from requirements.in

til/python/pdb-interact.md

3   Today [Carlton told me](https://twitter.com/carltongibson/status/1587155176590385159) about the [interact command](https://docs.python.org/3.10/library/pdb.html#pdbcommand-interact) in the Python debugger.

5   Here's how to use it with `pytest` (but it works anywhere else where you find yourself in a `pdb` session).

7   Use `pytest --pdb` to cause `pytest` to open a debugger at the first failed assertion (I added `assert False` to my test suite to demonstrate this).

9   Then type `interact` to drop into a full Python interactive prompt that keeps all of the local and global variables from the debugger:

11  ```

12  % pytest -k test_drop --pdb

13  ======== test session starts ========

14  platform darwin -- Python 3.10.3, pytest-7.1.3, pluggy-1.0.0

15  ...

16  >       assert False

17  E       assert False

19  tests/test_api_write.py:272: AssertionError

20  >>>> entering PDB >>>>

22  >>>> PDB post_mortem (IO-capturing turned off) >>>>

23  > /Users/simon/Dropbox/Development/datasette/tests/test_api_write.py(272)test_drop_table()

24  -> assert False

25  (Pdb) interact

26  >>> locals().keys()

27  dict_keys(['__name__', '__doc__', '__package__', '__loader__', '__spec__', '__file__', '__cached__', '__builtins__',

28    '@py_builtins', '@pytest_ar', 'Datasette', 'sqlite3', 'pytest', 'time', 'ds_write', 'write_token', 'test_write_row',

29    'test_write_rows', 'test_write_row_errors', 'test_delete_row', 'test_drop_table', 'scenario', 'token', 'should_work',

30    'path', 'response', '@py_assert0', '@py_format2'])

31  ```

til/python/packaging-pyinstaller.md

67  I solved this by adding each `ModuleNotFoundError` module to `--hidden-import` until it worked.

69  I've tested this script (and the generated executables) on both macOS and Ubuntu Linux so far, and it's worked perfectly in both cases. See [issue 93](https://github.com/simonw/datasette/issues/93) for more details.

til/python/os-remove-windows.md

1   # os.remove() on Windows fails if the file is already open

3   I puzzled over this one for [quite a while](https://github.com/simonw/sqlite-utils/issues/503) this morning. I had this test that was failing with Windows on Python 3.11:

5   ```python

6   @pytest.mark.parametrize(

7       "use_path,file_exists", [(True, True), (True, False), (False, True), (False, False)]

8   )

9   def test_recreate(tmpdir, use_path, file_exists):

10      filepath = str(tmpdir / "data.db")

11      if use_path:

17      assert ["t2"] == Database(filepath).table_names()

18  ```

19  The test checks that the `recreate=True` option to my `Database()` constructor deletes and re-creates the file.

21  Here's [the implementation](https://github.com/simonw/sqlite-utils/blob/9cbe19ac0547031f3b626d9d18ef05c0d193bf79/sqlite_utils/db.py#L320-L323) of that `recreate=True` option:

28  On Windows I was getting the following exception:

29  ```

30  FAILED tests/test_recreate.py::test_recreate[True-True] -

31    PermissionError: [WinError 32] The process cannot access the file because it is being used by another process:

32    'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\pytest-of-runneradmin\\pytest-0\\test_recreate_True_True_0\\data.db'

33  ```

34  Eventually I spotted the problem: my call on this line was opening a SQLite connection to the `data.db` file:

39  But it wasn't explicitly closing the SQLite connection. It turns out that leaves the database file open - and since the file is still open Windows raised an exception when `os.remove()` was called against it.

41  I fixed the error by closing the SQLite3 connection in my test, like this:

43  ```python

til/python/locust.md

1   # Simple load testing with Locust

3   I've been using [Locust](https://locust.io/) recently to run some load tests - most significantly [these tests](https://github.com/simonw/django_sqlite_benchmark/issues?q=is%3Aissue+is%3Aclosed) against SQLite running with Django and [this test](https://github.com/simonw/datasette-gunicorn/issues/1) exercising Datasette and Gunicorn.

5   ## A really basic test

7   Locust tests are defined in a `locustfile.py` file. Here's the most basic possible test, which sends requests to the `/` page of a web application:

9   ```python

22      locust

24  This opens a web server on http://0.0.0.0:8089/ (by default) which offers an interface for starting a new test:

26  <img width="484" alt="A form with fields for number of users (peak concurrency), spawn rate (users started/second) and host (the URL to start the tests at)" src="https://user-images.githubusercontent.com/9599/197367313-8b4c85b9-cd0e-45ed-adc2-789116687cb5.png">

28  You can run this for as long as you like, and it will produce both statistics on the load test and some pleasing charts:

30  <img width="1049" alt="Charts for total requests per second over time and response times in ms" src="https://user-images.githubusercontent.com/9599/197367351-03735a41-00cb-4bbf-a13d-cb22bbaf6e3b.png">

32  ## Using the command-line

34  You can also run tests without the web server at all. I tend to use this option as it's quicker to repeat a test, and you can easily copy and paste the results into a GitHub issue thread.

36      locust --headless --users 4 --spawn-rate 2 -H http://127.0.0.1:8001

38  This runs the tests in the current `locustfile.py` against `http://127.0.0.1:8001`, with four concurrent users and ramping up at 2 users every second (so taking two seconds to ramp up to full concurrency).

40  Hit `Ctrl+C` to end the test. It will end up producing something like this:

42  ```

55  ```

57  ## More complicated tests

59  Locust tests can get a lot more complex than this. The documentation [provides this example](https://docs.locust.io/en/stable/writing-a-locustfile.html):

61  ```python

til/python/introspect-function-parameters.md

18  ```

20  And here's an illustrative unit test:

22  ```python

23  def test_call_with_supported_arguments():

24      def foo(a, b):

25          return "{}+{}".format(a, b)

28      assert "1+2" == utils.call_with_supported_arguments(foo, a=1, b=2, c=3)

30      with pytest.raises(TypeError):

31          utils.call_with_supported_arguments(foo, a=1)

32  ```

til/python/installing-upgrading-plugins-with-pipx.md

56      ]

58  I added all of this to the Datasette docs here: https://docs.datasette.io/en/latest/installation.html#using-pipx

59  (see https://github.com/simonw/datasette/issues/756).

til/python/generate-nested-json-summary.md

1   # Generated a summary of nested JSON data

3   I was trying to figure out the shape of the JSON object from https://github.com/simonw/coronavirus-data-gov-archive/blob/master/data_latest.json?raw=true - which is 3.2MB and heavily nested, so it's difficult to get a good feel for the shape.

5   I solved this with a Python `summarize()` function which recursively truncates the nested lists and dictionaries.

28  import json, requests

29  data = requests.get(

30      "https://github.com/simonw/coronavirus-data-gov-archive/blob/master/data_latest.json?raw=true"

31  ).json()

32  print(json.dumps(summarize(data, list_limit=2, key_limit=7), indent=4))

til/python/cog-to-update-help-in-readme.md

11  import cog

12  from csvs_to_sqlite import cli

13  from click.testing import CliRunner

14  runner = CliRunner()

15  result = runner.invoke(cli.cli, ["--help"])

37  ## Testing with cog --check

39  A version of Cog released after I first wrote this TIL added a new `--check` option, so you can run a test in CI to check if the file needs to be updated using:

41      cog --check README.md

43  ## Writing a test (before cog --check)

45  Any time I generate content like this in a repo I like to include a test that will fail if I forget to update the content.

47  `cog` clearly isn't designed to be used as an independent library, but I came up with the following pattern `pytest` test which works well, in my `tests/test_csvs_to_sqlite.py` module:

49  ```python

56  def test_if_cog_needs_to_be_run():

57      _stdout = sys.stdout

58      sys.stdout = StringIO()

77  Cog then writes the generated output to `stdout` - which I capture with that `sys.stdout` trick.

79  Finally, I compare the generated output to the current file content and fail the test with a reminder to run `cog -r` if they do not match.

81  ## Cog for reStructuredText

til/python/codespell.md

51  jobs:

52    spellcheck:

53      runs-on: ubuntu-latest

54      steps:

55      - uses: actions/checkout@v2

til/python/callable.md

33  `check_callable(obj)` returns a `CallableStatus` named tuple, with an `is_callable` boolean saying if it can be caled and an `is_async_callable` boolean specifying if you need to use `await` with it.

35  I wrote these `pytest` tests to exercise the `check_callable()` function:

37  ```python

38  import pytest

63  @pytest.mark.parametrize(

64      "obj,expected_is_callable,expected_is_async_callable",

65      (

77      ),

78  )

79  def test_check_callable(obj, expected_is_callable, expected_is_async_callable):

80      status = check_callable(obj)

81      assert status.is_callable == expected_is_callable

til/python/call-pip-programatically.md

3   I needed this for the `datasette install` and `datasette uninstall` commands, see [issue #925](https://github.com/simonw/datasette/issues/925).

5   My initial attempt at this resulted in weird testing errors ([#928](https://github.com/simonw/datasette/issues/928)) - while investigating them I stumbled across [this comment](https://github.com/pypa/pip/blob/e060970d51c5946beac8447eb95585d83019582d/src/pip/_internal/cli/main.py#L23-L47) in the `pip` source code:

7   ```

41       run_module("pip", run_name="__main__")

42  ```

43  And here's how I wrote [a unit test](https://github.com/simonw/datasette/blob/afdeda8216d4d3027f87583ccdbef17ad85022ef/tests/test_cli.py#L114-L124) for it:

44  ```python

45  @mock.patch("datasette.cli.run_module")

46  def test_install(run_module):

47      runner = CliRunner()

48      runner.invoke(cli, ["install", "datasette-mock-plugin", "datasette-mock-plugin2"])

til/pytest/treat-warnings-as-errors.md

1   # Treating warnings as errors in pytest

3   I was seeing this warning in a Django project when I thought I was correctly using timezone-aware dates everywhere:

5   > RuntimeWarning: DateTimeField Shift.shift_start received a naive datetime (2022-04-01 00:00:00) while time zone support is active

7   Running `pytest -Werror` turns those warnings into errors that fail the tests.

9   Which means you can investigate them in the Python debugger by running:

11      pytest -Werror --pdb -x

13  The `--pdb` starts the debugger at the warning (now error) and the `-x` stops the tests after the first failure.

15  ## In pytest.ini

17  You can also set this in `pytest.ini` - useful if you want ALL warnings to be failures in both development and CI.

19  Add the following to the `pytest.ini` file:

21  ```ini

22  [pytest]

23  # ...

24  filterwarnings =

32  ```ini

33  [pytest]

34  # ...

35  filterwarnings =

til/pytest/test-click-app-with-streaming-input.md

1   # Testing a Click app with streaming input

3   For [sqlite-utils#364](https://github.com/simonw/sqlite-utils/issues/364) I needed to write a test for a [Click](https://click.palletsprojects.com/) app which dealt with input streamed to standard input. I needed to run some assertions during that process, which ruled out the usual [CliRunner.invoke()](https://click.palletsprojects.com/en/8.0.x/testing/) testing tool since that works by running the command until completion.

5   I decided to use `subprocess` to run the application. Here's the pattern I came up with for the test:

6   ```python

7   def test_insert_streaming_batch_size_1(db_path):

8       # https://github.com/simonw/sqlite-utils/issues/364

9       # Streaming with --batch-size 1 should commit on each record

46  I realized I needed to call `proc.stdin.flush()` after each write to ensure the write was pushed to the process in a predictable manner.

48  At the end of the test, running `proc.stdin.close()` is equivalent to sending an end-of-file, then `proc.wait()` ensures the process has finished and terminated.

til/pytest/syrupy.md

1   # Snapshot testing with Syrupy

3   I'm a big fan of snapshot testing - writing tests where you compare the output of some function to a previously saved version, and can re-generate that version from scratch any time something changes.

5   I usually do this by hand - I run `pytest -x --pdb` to stop at the first failing test and drop into a debugger, then copy out the representation of the generated value and copy it into the test. I wrote about how I use this pattern a few years ago in [How to cheat at unit tests with pytest and Black](https://simonwillison.net/2020/Feb/11/cheating-at-unit-tests-pytest-black/).

7   Today I learned how to do the same thing with the [Syrupy](https://github.com/tophat/syrupy) plugin for [pytest](https://docs.pytest.org/). I think I'll be using this for many of my future projects.

9   ## Some initial tests

11  I created a `tests/test_stuff.py` file with the following contents:

13  ```python

14  def test_one(snapshot):

15      assert "hello" == snapshot

18  def test_two(snapshot):

19      assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}

20  ```

21  Then I installed both `pytest` and `syrupy`:

23  ```bash

24  pip install pytest syrupy

25  ```

26  Now in my parent folder I can run this:

27  ```bash

28  pytest

29  ```

30  And the tests fail:

31  ```

32  tests/test_stuff.py FF                                                              [100%]

34  ======================================== FAILURES =========================================

35  ________________________________________ test_one _________________________________________

37  snapshot = SnapshotAssertion(name='snapshot', num_executions=1)

39      def test_one(snapshot):

40  >       assert "hello" == snapshot

41  E       AssertionError: assert [+ received] == [- snapshot]

42  E         Snapshot 'test_one' does not exist!

43  E         + 'hello'

45  tests/test_stuff.py:2: AssertionError

46  ________________________________________ test_two _________________________________________

48  snapshot = SnapshotAssertion(name='snapshot', num_executions=1)

50      def test_two(snapshot):

51  >       assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}

52  E       AssertionError: assert [- snapshot] == [+ received]

53  E         Snapshot 'test_two' does not exist!

54  E         + dict({

55  E         +   'bar':

57  E         ...Full output truncated (9 lines hidden), use '-vv' to show

59  tests/test_stuff.py:5: AssertionError

60  --------------------------------- snapshot report summary ---------------------------------

61  2 snapshots failed.

62  ================================= short test summary info =================================

63  FAILED tests/test_stuff.py::test_one - AssertionError: assert [+ received] == [- snapshot]

64  FAILED tests/test_stuff.py::test_two - AssertionError: assert [- snapshot] == [+ received]

65  ==================================== 2 failed in 0.05s ====================================

66  ```

67  The snapshots don't exist yet. But I can create them automatically by running this:

68  ```bash

69  pytest --snapshot-update

70  ```

71  Which outputs passing tests along with:

72  ```

73  --------------------------------- snapshot report summary ---------------------------------

75  ==================================== 2 passed in 0.01s ====================================

76  ```

77  And sure enough, there's now a new folder called `tests/__snapshots__` with a file called `test_stuff.ambr` which contains this:

78  ```

79  # serializer version: 1

80  # name: test_one

81    'hello'

82  # ---

83  # name: test_two

84    dict({

85      'bar': dict({

94  # ---

95  ```

96  Running `pytest` again passes, because the snapshots exist and continue to match the test output.

98  The serialized snapshot format is designed to be checked into Git. It's pleasantly readable - I can review that and see what it's testing, and I could even update it by hand - though I'll much more likely use the `--snapshot-update` flag and then eyeball the differences.

100 ## Adding a dataclass

114 def test_one(snapshot):

115     assert "hello" == snapshot

118 def test_two(snapshot):

119     assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}

122 def test_three(snapshot):

123     assert Foo(1, "hello") == snapshot

124 ```

125 Running `pytest` again failed. `pytest --snapshot-update` passed and updated my snapshot file, adding this to it:

126 ```

127 # name: test_three

128   Foo(bar=1, baz='hello')

129 ```

138         self.bar = bar

140 def test_four(snapshot):

141     assert WeirdClass(1, 2) == snapshot

142 ```

143 Serialized to:

144 ```

145 # name: test_four

146   WeirdClass(

147     bar=2,

til/pytest/subprocess-server.md

1   # Start a server in a subprocess during a pytest session

3   I wanted to start an actual server process, run it for the duration of my pytest session and shut it down at the end.

5   Here's the recipe I came up with. This fixture lives in `conftest.py`:

7   ```python

8   import pytest

9   import sqlite_utils

10  import subprocess

12  @pytest.fixture(scope="session")

13  def ds_server(tmp_path_factory):

14      db_directory = tmp_path_factory.mktemp("dbs")

15      db_path = db_directory / "test.db"

16      db = sqlite_utils.Database(db_path)

17      insert_test_data(db)

18      ds_proc = subprocess.Popen(

19          [

31      assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8")

32      yield ds_proc

33      # Shut it down at the end of the pytest session

34      ds_proc.terminate()

35  ```

36  A test looks like this:

37  ```python

38  import httpx

40  def test_server_starts(ds_server):

41      response = httpx.get("http://127.0.0.1:8041/")

42      assert response.status_code == 200

45  ## Alternative recipe for serving static files

47  While [adding tests to Datasette Lite](https://github.com/simonw/datasette-lite/issues/35) I found myself needing to run a localhost server that served static files directly.

49  I completely forgot about this TIL, and instead took inspiration [from pytest-simplehttpserver](https://github.com/ppmdo/pytest-simplehttpserver/blob/a82ad31912121c074ff1a76c4628a1c42c32b41b/src/pytest_simplehttpserver/pytest_plugin.py#L17-L28) - coming up with this pattern:

51  ```python

52  from subprocess import Popen, PIPE

53  import pathlib

54  import pytest

55  import time

56  from http.client import HTTPConnection

61  @pytest.fixture(scope="module")

62  def static_server():

63      process = Popen(

85  Again, including `static_server` as a fixture is enough to ensure requests to `http://localhost:8123/` will be served by that temporary server.

87  I like how this version polls for a successful HEAD request (a trick inspired by `pytest-simplehttpserver`) rather than just sleeping.

til/pytest/show-files-opened-by-tests.md

1   # Show files opened by pytest tests

3   My test suite for [Datasette](https://github.com/simonw/datasette) has grown so large that running the whole thing sometimes causes me to run out of file handles.

5   I've not solved this yet, but I did figure out a pattern to get `pytest` to show me which new files were opened by which tests.

7   Add the following to `conftest.py`:

9   ```

13  @pytest.fixture(autouse=True)

14  def check_for_new_file_handles(request):

15      proc = psutil.Process()

22  ```

24  This uses [psutil](https://pypi.org/project/psutil/) (`pip install psutil`) to build a set of the open files before and after the test runs. It then uses a list comprehension to figure out which file handles are new.

26  Using `@pytest.fixture(autouse=True)` means it will automatically be used for every test.

28  It's a `yield` fixture, which means the part of the code before the `yield` statement runs before the test, then the part afterwards runs after the test function has finished.

30  Accepting the `request` argument means it gets access to a `pytest` request object, which includes `request.node` which is an object representing the test that is being executed.

32  You need to run `pytest -s` to see the output (without the `-s` the output is hidden).

34  Example output:

36  `tests/test_permissions.py .[popenfile(path='/private/var/folders/wr/hn3206rs1yzgq3r49bz8nvnh0000gn/T/tmp76w2ukin/fixtures.db', fd=8)] opened by <Function test_view_padlock[/-None-200-200]>..`

til/pytest/session-scoped-tmp.md

1   # Session-scoped temporary directories in pytest

3   I habitually use the `tmpdir` fixture in pytest to get a temporary directory that will be cleaned up after each test, but that doesn't work with `scope="session"` - which can be used to ensure an expensive fixture is run only once per test session and the generated content is used for multiple tests.

5   To get a temporary directory that works with `scope="session"`, use the `tmp_path_factory` built-in pytest fixture like this:

7   ```python

8   import pytest

11  @pytest.fixture(scope="session")

12  def template_dir(tmp_path_factory):

13      template_dir = tmp_path_factory.mktemp("page-templates")

21  def test_about(template_dir):

22      assert "ABOUT!" == (template_dir / "pages" / "about.html").read_text()

25  def test_request(template_dir):

26      assert "request" == (template_dir / "pages" / "request.html").read_text()

27  ```

29  Example: https://github.com/simonw/datasette/blob/1b7b66c465e44025ec73421bd69752e42f108321/tests/test_custom_pages.py#L16-L45

til/pytest/registering-plugins-in-tests.md

1   # Registering temporary pluggy plugins inside tests

3   While implementing more finely-grained permissions for `datasette-insert-api` ([issue 8](https://github.com/simonw/datasette-insert-api/issues/8)) I decided I wanted to register a Datasette pluggy plugin for the duration of a single test.

5   Here's the pattern I figured out for doing that:

8   from datasette import hookimpl

9   from datasette.plugins import pm

10  import pytest

13  def test_using_test_plugin():

14      class TestPlugin:

15          __name__ = "TestPlugin"

22      pm.register(TestPlugin(), name="undo")

23      try:

24          # Rest of test goes here

25      finally:

26          pm.unregister(name="undo")

27  ```

29  Here's [an example](https://github.com/simonw/datasette-insert/blob/7f4c2b3954190d547619d043bbe714481b10ac1e/tests/test_insert_api.py) of a test that uses a pytest fixture to register (and de-register) a plugin:

31  ```python

33  from datasette.app import Datasette

34  from datasette.plugins import pm

35  import pytest

38  @pytest.fixture

39  def unsafe():

40      class UnsafeInsertAll:

53  @pytest.mark.asyncio

54  async def test_insert_alter(ds, unsafe):

55      async with httpx.AsyncClient(app=ds.app()) as client:

56          response = await client.post(

til/pytest/pytest-subprocess.md

1   # Mocking subprocess with pytest-subprocess

3   For [apple-notes-to-sqlite](https://github.com/dogsheep/apple-notes-to-sqlite) I needed to write some tests that simulated executing the `osascript` command using the Python `subprocess` module.

5   I wanted my tests to run on Linux CI machines, where that command would not exist.

7   After failing to use `unittest.mock.patch` to solve this, I went looking for alternatives. I found [pytest-subprocess](https://pypi.org/project/pytest-subprocess/).

9   Here's the relevant section of [the test I wrote](https://github.com/dogsheep/apple-notes-to-sqlite/blob/0.1/tests/test_apple_notes_to_sqlite.py):

11  ```python

17  """

19  def test_apple_notes_to_sqlite_dump(fp):

20      fp.register_subprocess(["osascript", "-e", COUNT_SCRIPT], stdout=b"2")

21      fp.register_subprocess(["osascript", "-e", fp.any()], stdout=FAKE_OUTPUT)

25          # ...

26  ```

27  `fp` is the fixture provided by the package (you need to `pip install pytest-subprocess` for this to work).

29  `COUNT_SCRIPT` here is the first of my `osascript` constants. It looks like this (in `cli.py`):

51  I eventually figured that using `fp.any()` was easier than specifying the exact script. This is a wildcard value which matches any string. It returns the full `FAKE_OUTPUT` variable as the simulated standard out.

53  What's useful about `pytest-subprocess` is that it works for both `subprocess.check_output()` and more complex `subprocess.Popen()` calls - both of which I was using in this script.

til/pytest/pytest-recording-vcr.md

1   # Using VCR and pytest with pytest-recording

3   [pytest-recording](https://github.com/kiwicom/pytest-recording) is a neat pytest plugin that makes it easy to use the [VCR library](https://vcrpy.readthedocs.io/), which helps write tests against HTTP resources by automatically capturing responses and baking them into a YAML file to be replayed during the tests.

5   It even works with [boto3](https://aws.amazon.com/sdk-for-python/)!

7   To use it, first install it with `pip install pytest-recording` and then add the `@pytest.mark.vcr` decorator to a test that makes HTTP calls:

9   ```python

10  @pytest.mark.vcr

11  def test_create():

12      runner = CliRunner()

13      with runner.isolated_filesystem():

14          result = runner.invoke(cli, ["create", "pytest-bucket-simonw-1", "-c"])

15          assert result.exit_code == 0

16  ```

18  The first time you run the tests, use the `--record-mode=once` option:

20      pytest -k test_create --record-mode=once

22  This defaults to creating a YAML file in `tests/cassettes/test_s3_credentials/test_create.yaml`.

24  Subsequent runs of `pytest -k test_create` will reuse those recorded HTTP requests and will not make any network requests - I confirmed this by turning off my laptop's WiFi.

til/pytest/pytest-mock-calls.md

1   # Quick and dirty mock testing with mock_calls

3   I needed to write a test that checked for a really complex sequence of mock calls for [s3-credentials#3](https://github.com/simonw/s3-credentials/issues/3).

5   I ended up using the following trick, using [pytest-mock](https://pypi.org/project/pytest-mock/):

7   ```python

8   def test_create(mocker):

9       boto3 = mocker.patch("boto3.client")

10      runner = CliRunner()

11      with runner.isolated_filesystem():

12          result = runner.invoke(cli, ["create", "pytest-bucket-simonw-1", "-c"])

13          assert [str(c) for c in boto3.mock_calls] == [

14              "call('s3')",

15              "call('iam')",

16              "call().head_bucket(Bucket='pytest-bucket-simonw-1')",

17              "call().get_user(UserName='s3.read-write.pytest-bucket-simonw-1')",

18              'call().put_user_policy(PolicyDocument=\'{"Version": "2012-10-17", "Statement": [{"Sid": "ListObjectsInBucket", "Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1"]}, {"Sid": "AllObjectActions", "Effect": "Allow", "Action": "s3:*Object", "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1/*"]}]}\', PolicyName=\'s3.read-write.pytest-bucket-simonw-1\', UserName=\'s3.read-write.pytest-bucket-simonw-1\')',

19              "call().create_access_key(UserName='s3.read-write.pytest-bucket-simonw-1')",

20              "call().create_access_key().__getitem__('AccessKey')",

21              "call().create_access_key().__getitem__().__str__()",

22          ]

23  ```

24  I used the trick I describe in [How to cheat at unit tests with pytest and Black](https://simonwillison.net/2020/Feb/11/cheating-at-unit-tests-pytest-black/) where I run that comparison against an empty `[]` list, then use `pytest --pdb` to drop into a debugger and copy and paste the output of `[str(c) for c in boto3.mock_calls]` into my test code.

26  Initially I used a comparison directly against `boto3.mock_calls` - but this threw a surprising error. The calls sequence I baked into my tests looked like this:

28  ```python

29  from unittest.mock import call

31  # ...

34              call("s3"),

35              call("iam"),

36              call().head_bucket(Bucket="pytest-bucket-simonw-1"),

37              call().get_user(UserName="s3.read-write.pytest-bucket-simonw-1"),

38              call().put_user_policy(

39                  PolicyDocument='{"Version": "2012-10-17", "Statement": [{"Sid": "ListObjectsInBucket", "Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1"]}, {"Sid": "AllObjectActions", "Effect": "Allow", "Action": "s3:*Object", "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1/*"]}]}',

40                  PolicyName="s3.read-write.pytest-bucket-simonw-1",

41                  UserName="s3.read-write.pytest-bucket-simonw-1",

42              ),

43              call().create_access_key(UserName="s3.read-write.pytest-bucket-simonw-1"),

44              call().create_access_key().__getitem__("AccessKey"),

45              call().create_access_key().__getitem__().__str__(),

46          ]

47  ```

48  But when I ran `pytest` that last one failed:

49  ```

50  E             -  'call().create_access_key().__getitem__()',

53  E             ?                                          ^^^^^^^^^^

54  ```

55  It turns out `__str__()` calls do not play well with the `call()` constructor - see [this StackOverflow question](https://stackoverflow.com/questions/61926147/how-to-represent-unittest-mock-call-str).

57  My solution was to cast them all to `str()` using a list comprehension, which ended up fixing that problem.

59  ## Gotcha: parameter ordering

61  There's one major flaw to the `str()` trick I'm using here: the order in which parameters are displayed in the string representation of `call()` may differ between Python versions. I had to undo this trick in one place I was using it ([see here](https://github.com/simonw/s3-credentials/issues/8)) as a result due to the following test failure:

63  ```

til/pytest/pytest-code-coverage.md

1   # Code coverage using pytest and codecov.io

3   I got my [asgi-csrf](https://github.com/simonw/asgi-csrf) Python package up to 100% code coverage. Here's [the pull request](https://github.com/simonw/asgi-csrf/issues/13).

5   I started by installing and using the [pytest-cov](https://pypi.org/project/pytest-cov/) pytest plugin.

7   ```

8   pip install pytest-cov

9   pytest --cov=asgi_csrf

10  ```

11  This shows the current code coverage percentage for the `asgi_csrf` module in the terminal output:

13  collected 18 items

15  test_asgi_csrf.py ..................                                                                                                                           [100%]

17  ---------- coverage: platform darwin, python 3.7.3-final-0 -----------

23  ========= 18 passed in 0.37s =========

24  ```

25  To generate an HTML report showing which lines are not covered by tests:

26  ```

27  pytest --cov=asgi_csrf --cov-report=html

28  open htmlcov/index.html

29  ```

30  Here's a hosted copy of that report: https://asgi-csrf-htmlcov-ewca4t9se.vercel.app/asgi_csrf_py.html

32  ## Failing the tests if coverage is below a certain threshold

34  The `--cov-fail-under=100` option does this:

36  ```

37  pytest --cov-fail-under=100 --cov asgi_csrf

38  ======= test session starts =======

39  platform darwin -- Python 3.7.3, pytest-6.0.1, py-1.9.0, pluggy-0.13.1

40  rootdir: /Users/simon/Dropbox/Development/asgi-csrf

41  plugins: cov-2.10.1, asyncio-0.14.0

42  collected 18 items

44  test_asgi_csrf.py ..................                                                                                                                           [100%]

46  ---------- coverage: platform darwin, python 3.7.3-final-0 -----------

49  asgi_csrf.py     169     13    92%

51  FAIL Required test coverage of 100% not reached. Total coverage: 92.31%

52  ```

53  I added this to my [GitHub test action](https://github.com/simonw/asgi-csrf/blob/83d2b4f6bb034b746fd3f20f57ebdbaeae007a73/.github/workflows/test.yml#L27-L29):

54  ```yaml

55      - name: Run tests

56        run: |

57          pytest --cov-fail-under=100 --cov asgi_csrf

58  ```

59  ## Pushing results to codecov.io

61  https://codecov.io/ offers free coverage reporting for open source projects. I authorized it against my GitHub account, then enabled it for the `asgi-csrf` project by navigating to https://codecov.io/gh/simonw/asgi-csrf (hacking the URL saves you from having to paginate through all of your repos looking for the right one).

63  codecov.io gives you a token - set that as a GitHub repository secret as `CODECOV_TOKEN` - then add the following to the test action configuration:

64  ```yaml

65      - name: Upload coverage to codecov.io

69          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

70  ```

71  This will upload your coverage report (no matter if the previous test step failed or succeeded). codecove.io then reports back to pull requests and maintains a dashboard for your project.

73  codecov.io doesn't detect if you use a `main` or `master` branch so I had to switch the default branch in the settings at https://codecov.io/gh/simonw/asgi-csrf/settings

til/pytest/pytest-argparse.md

1   # Writing pytest tests against tools written with argparse

3   I usually build command-line tools using [Click](https://click.palletsprojects.com/) (and my [click-app](https://github.com/simonw/click-app) cookiecutter template), which includes a really nice [set of tools](https://click.palletsprojects.com/en/8.0.x/testing/) for writing tests.

5   Today I decided to try building a tool called [stream-delay](https://github.com/simonw/stream-delay) using [argparse]() from the Python standard library, since it didn't need any other dependencies.

7   The one challenge I had was how to write the tests. I used [pytest](https://pytest.org/) as a test-only dependency.

9   Here's the pattern I came up with, using the [capsys pytest fixture](https://docs.pytest.org/en/6.2.x/capture.html) to capture standard output from my tool.

11  ```python

12  from stream_delay import main

13  import pytest

15  @pytest.mark.parametrize("option", ("-h", "--help"))

16  def test_help(capsys, option):

17      try:

18          main([option])

38      # ...

39  ```

40  As you can see, `main()` takes an optional list of arguments. The default for that is `None` which will cause `argparse` to read `sys.argv` - but I can inject arguments to the function from my tests if I need to.

42  I'm catching the `SystemExit` exception because this will be raised by default if you use `-h` or `--help` - but I still want to finish my test execution so I can inspect the captured output.

44  Complete code:

46  - [stream_delay.py](https://github.com/simonw/stream-delay/blob/0.1/stream_delay.py)

47  - [tests/test_stream_delay.py](https://github.com/simonw/stream-delay/blob/0.1/tests/test_stream_delay.py)

til/pytest/playwright-pytest.md

1   # Using pytest and Playwright to test a JavaScript web application

3   I [decided to add](https://github.com/simonw/datasette-lite/issues/35) automated tests to my [Datasette Lite](https://simonwillison.net/2022/May/4/datasette-lite/) project. Datasette Lite bundles my Datasette Python web application as a client-side application running inside WebAssembly using Pyodide.

5   I wrote the tests using [playwright-pytest](https://github.com/microsoft/playwright-pytest), which lets you write tests in Python using Microsoft's [Playwright](https://playwright.dev/) browser automation library.

7   ## Installing playwright-pytest

9   Two steps:

11      pip install playwright-pytest

13  Then a second step to install the browsers using by Playwright itself:

15      playwright install

17  I had those browsers installed already, but I still needed to run that command since the updated version of `playwright-pytest` needed more recent versions.

19  (I had limited internet while doing this, and discovered that you can trick Playwright into using an older browser version by renaming a folder in `~/Library/Caches/ms-playwright` to the one that shows up in the error message that says that the browsers cannot be found.)

21  ## An initial test

23  The first test I wrote looked like this, saved in `test_app.py`:

25  ```python

26  from playwright.sync_api import Page, expect

28  def test_initial_load(page: Page):

29      page.goto("https://lite.datasette.io/")

30      loading = page.locator("#loading-indicator")

38  ```

40  Then run the test by running this in the same directory as that file:

42      pytest

44  `playwright-pytest` provides the `page` fixture - annotating it with `: Page` is optional but if you do that then VS Code knows what it is and can provide autocomplete in the editor.

46  `page.goto()` causes the browser to navigate to that URL.

53  expect(loading).to_have_css("display", "block")

54  ```

55  The `expect()` helper function encapsulates the concept of polling the page to wait for something to become true within a time limit. This is the key technique for avoiding "flaky" tests when working with Playwright.

58  The assertions [are listed here](https://playwright.dev/python/docs/test-assertions).

60  You don't actually need to use `expect()` though - that's useful if you don't know how long it will take for the page to load, but if you know the page is already loaded you can write assertions like this instead:

67  ```

69  ## pytest options

71  The `playwright-pytest` package adds a bunch of new options to `pytest`. The most useful is `--headed`:

73      pytest --headed

75  This runs the tests in "headed" mode - which means a visible browser window pops up during the tests so you can see what is happening.

77  `--browser firefox` runs them using Firefox instead of Chromium.

79  `--tracing on` is really interesting: it generates a trace ZIP file which you can then open using https://trace.playwright.dev/ to explore a detailed trace of the test as it executed.

81  `--video on` records a video (as a `.webm` file) of the test. I've not tried it yet, but `--video retain-on-failure` only keeps that video if the test fails.

83  Here's [documentation on all of the options](https://playwright.dev/python/docs/test-runners).

85  ## Running a localhost static server during the tests

87  I wanted to run the tests against the most recent version of my code, which consists of an `index.html` file and a `webworker.js` file. Because these use web workers they need to be run from an actual localhost web server, so I needed to start one at the beginning of the tests and shut it down at the end.

89  I wrote about my solution for this in another TIL: [Start a server in a subprocess during a pytest session](https://til.simonwillison.net/pytest/subprocess-server).

91  ## My test suite so far

93  Here's where [I've got to so far](https://github.com/simonw/datasette-lite/blob/daba69708c7a72adad20dce3c534b9a399ef11c8/tests/test_datasette_lite.py):

95  ```python

97  from subprocess import Popen, PIPE

98  import pathlib

99  import pytest

100 import time

101 from http.client import HTTPConnection

106 @pytest.fixture(scope="module")

107 def static_server():

108     process = Popen(

131 @pytest.fixture(scope="module")

132 def dslite(static_server, browser: Browser) -> Page:

133     page = browser.new_page()

142 def test_initial_load(dslite: Page):

143     expect(dslite.locator("#loading-indicator")).to_have_css("display", "none")

146 def test_has_two_databases(dslite: Page):

147     assert [el.inner_text() for el in dslite.query_selector_all("h2")] == [

148         "fixtures",

153 def test_navigate_to_database(dslite: Page):

154     h2 = dslite.query_selector("h2")

155     assert h2.inner_text() == "fixtures"

174 ## Running it in GitHub Actions

176 Here's the [GitHub Actions workflow](https://github.com/simonw/datasette-lite/blob/main/.github/workflows/test.yml) I'm using to run the tests:

178 ```yaml

188 jobs:

189   test:

190     runs-on: ubuntu-latest

191     steps:

192     - uses: actions/checkout@v3

206         pip install -r dev-requirements.txt

207         playwright install

208     - name: Run test

209       run: |

210         pytest

211 ```

212 [dev-requirements.txt](https://raw.githubusercontent.com/simonw/datasette-lite/main/dev-requirements.txt) contains this:

213 ```

214 pytest-playwright==0.3.0

215 playwright==1.24.0

216 ```

til/pytest/only-run-integration.md

1   # Opt-in integration tests with pytest --integration

3   For both [s3-credentials](https://github.com/simonw/s3-credentials) and [datasette-publish-fly](https://github.com/simonw/datasette-publish-fly) I have a need for real-world integration tests that actually interact with the underlying APIs (AWS or Fly) to create and destroy resources on those platforms.

5   Most of the time I want my tests to run without doing these. I want the option to run `pytest --integration` to opt-in to running those extra integration tests.

7   Here's the pattern I'm using. First, in `tests/conftest.py`:

9   ```python

10  import pytest

13  def pytest_addoption(parser):

14      parser.addoption(

15          "--integration",

16          action="store_true",

17          default=False,

18          help="run integration tests",

19      )

22  def pytest_configure(config):

23      config.addinivalue_line(

24          "markers",

25          "integration: mark test as integration test, only run with --integration",

26      )

29  def pytest_collection_modifyitems(config, items):

30      if config.getoption("--integration"):

31          # Also run integration tests

32          return

33      skip_integration = pytest.mark.skip(reason="use --integration option to run")

34      for item in items:

35          if "integration" in item.keywords:

36              item.add_marker(skip_integration)

37  ```

38  This implements a `@pytest.mark.integration` marker which I can use to mark any test that should be considered part of the integration test suite. These will be skipped by default... but will not be skipped if the `--integration` option is passed to `pytest`.

40  Then in the tests I can either do this:

42  ```python

43  @pytest.mark.integration

44  def test_integration_s3():

45      # ...

46  ```

47  Or if I have a module that only contains integration tests - `tests/test_integration.py` - I can use the following line to apply that decorator to every test in the module:

48  ```python

49  import pytest

51  pytestmark = pytest.mark.integration

53  def test_integration_s3():

54      # ...

55  ```

til/pytest/mocking-boto.md

1   # Mocking a Textract LimitExceededException with boto

3   For [s3-ocr issue #21](https://github.com/simonw/s3-ocr/issues/21) I needed to write a test that simulates what happens when Amazon Textract returns a "LimitExceededException". When using boto this error presents itself as an exception:

5   > `botocore.errorfactory.LimitExceededException: An error occurred (LimitExceededException) when calling the StartDocumentTextDetection operation: Open jobs exceed maximum concurrent job limit`

7   I uses [moto](https://github.com/spulec/moto) to simulate AWS in that test suite, but moto does not yet have a mechanism for simulating Textract errors like this one.

9   I ended up turning to Python mocks, here provided by the the [pytest-mock](https://pypi.org/project/pytest-mock/) fixture. Here's the test I came up with:

11  ```python

12  def test_limit_exceeded_automatic_retry(s3, mocker):

13      mocked = mocker.patch("s3_ocr.cli.start_document_text_extraction")

14      # It's going to fail the first time, then succeed

33  ```python

34  def start_document_text_extraction(textract, **kwargs):

35      # Wrapper function to make this easier to mock in tests

36      return textract.start_document_text_detection(**kwargs)

37  ```

38  The most confusing thing about working with Python mocks is figuring out the string to use to mock the right piece of code. I like this pattern of refactoring the code under test to make it as simple to mock as possible.

40  The code I am testing here implements automatic retries. As such, I needed the API method I am simulating to fail the first time and then succeed the second time.

42  Originally I had done this with a `side_effect()` function - see below - but then [@szotten on Twitter](https://twitter.com/szotten/status/1556337221258575873) pointed out that you can instead set `mock.side_effect` to a list and it will [cycle through those items in turn](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.Mock.side_effect):

44  ```python

til/pytest/mock-httpx.md

1   # How to mock httpx using pytest-mock

3   I wrote this test to exercise some [httpx](https://pypi.org/project/httpx/) code today, using [pytest-mock](https://pypi.org/project/pytest-mock/).

5   The key was to use `mocker.patch.object(cli, "httpx")` which patches the `httpx` module that was imported by the `cli` module.

7   Here the `mocker` function argument is a fixture that is provided by `pytest-mock`.

9   ```python

10  from conditional_get import cli

11  from click.testing import CliRunner

14  def test_performs_conditional_get(mocker):

15      m = mocker.patch.object(cli, "httpx")

16      m.get.return_value = mocker.Mock()

39          )

40  ```

41  https://github.com/simonw/conditional-get/blob/485fab46f01edd99818b829e99765ed9ce0978b5/tests/test_cli.py

43  ## Mocking a JSON response

45  Here's a mock for a GraphQL POST request that returns JSON:

46  ```python

47  @pytest.fixture

48  def mock_graphql_region(mocker):

49      m = mocker.patch("datasette_publish_fly.httpx")

52      m.post.return_value.json.return_value = {"data": {"nearestRegion": {"code": "sjc"}}}

53  ```

54  https://github.com/simonw/datasette-publish-fly/blob/5253220bded001e94561e215d553f352838e7a1c/tests/test_publish_fly.py#L16-L21

56  ## Mocking httpx.stream

67  https://stackoverflow.com/a/6112456 helped me figure out the following:

68  ```python

69  def test_performs_conditional_get(mocker):

70      m = mocker.patch.object(cli, "httpx")

71      m.stream.return_value.__enter__.return_value = mocker.Mock()

75      ]

76  ```

77  https://github.com/simonw/conditional-get/blob/80454f972d39e2b418572d7938146830fab98fa6/tests/test_cli.py

79  ## Mocking an HTTP error triggered by response.raise_for_status()

81  The `response.raise_for_status()` raises an exception if an HTTP error (e.g. a 404 or 500) occurred.

83  Here's how I [mocked that to return an error](https://github.com/simonw/airtable-to-yaml/blob/ebd94b2e29d6f2ec3dc64d161495a759330027e8/tests/test_airtable_to_yaml.py#L43-L56):

85  ```python

86  def test_airtable_to_yaml_error(mocker):

87      m = mocker.patch.object(cli, "httpx")

88      m.get.return_value = mocker.Mock()

til/pytest/coverage-with-context.md

1   # pytest coverage with context

3   [This tweet](https://twitter.com/mariatta/status/1499863816489734146) from \@Mariatta tipped me off to the ability to measure "contexts" when [running coverage](https://coverage.readthedocs.io/en/6.3.2/contexts.html#context-reporting) - as a way to tell which tests exercise which specific lines of code.

5   My [sqlite-utils](https://github.com/simonw/sqlite-utils) project uses `pytest` for the test suite. I decided to figure out how to get this working with [pytest-cov](https://pypi.org/project/pytest-cov/).

7   After some experimentation, this is the recipe that worked for me:

9   ```

10  # In the virtual environment, make sure pytest-cov is installed:

11  % pip install pytest-cov

12  # First, run pytest to calculate coverage of the `sqlite_utils` package, with context

13  % pytest --cov=sqlite_utils --cov-context=test

14  # The .coverage file is actually a SQLite database:

15  % ls -lah .coverage

23  Here's what one of the pages looks like, displaying the context for some lines of code:

25  ![The code has an expandable section which reveals which tests executed each individual line.](https://user-images.githubusercontent.com/9599/156860441-66e35994-653a-4ab7-b690-4d901fc57750.png)

27  ## The .coverage schema

til/pytest/async-fixtures.md

1   # Async fixtures with pytest-asyncio

3   I wanted to use a fixture with `pytest-asyncio` that was itsef as `async def` function, so that it could execute `await` statements.

5   Since I'm using a `pytest.ini` file containing `asyncio_mode = strict` I had to use the `@pytest_asyncio.fixture` fixture to get this to work. Without that fixture I got this error:

7   ```

9   E   AssertionError: assert False

10  E    +  where False = _has_explicit_asyncio_mark(<function ds_with_route at 0x11332d2d0>)

11  E    +    where <function ds_with_route at 0x11332d2d0> = <FixtureDef argname='ds_with_route' scope='function' baseid='tests/test_routes.py'>.func

12  ```

14  Swapping `@pytest.fixture` for `@pytest_asyncio.fixture` fixed this problem:

16  ```python

17  import pytest_asyncio

19  @pytest_asyncio.fixture

20  async def ds_with_route():

21      ds = Datasette()

til/pytest/assert-dictionary-subset.md

1   # Asserting a dictionary is a subset of another dictionary

3   My [lazy approach to writing unit tests](https://simonwillison.net/2020/Feb/11/cheating-at-unit-tests-pytest-black/) means that sometimes I want to run an assertion against most (but not all) of a dictionary.

5   Take for example an API endpoint that returns something like this:

12  }

13  ```