diff --git a/README.md b/README.md index b9374a2..b56bd32 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,39 @@ -# Missing data, Speculative Reading code/data +# Replication code and data for "Missing data, Speculative Reading" This repository provides replication code and data for **Missing Data, Speculative Reading** article. -## contents -- [data](data) -- [missing data](missing-data) -- [speculative reading](speculative-reading) +## Contents +- [data](data) - data specific to this article and source data from the [Shakepeare and Company Project](https://shakespeareandco.princeton.edu/) +- [missing_data](missing_data) - code notebooks for the missing data portion of the article +- [speculative_reading](speculative_reading) - code notebooks for the speculative reading portion of the article +- [appendix](appendix) - additional notebooks with validation, alternate approaches, etc; work that did not make it into the article +- [figures][figures] - exported versions of figures for the article generated by code in multiple formats where supported +- [utils][utils] - utility python code used by multiple notebooks + +## Installing dependencies and running code + +This code has been tested against **python 3.9**. + +To run the code, first clone or download the repository. + +Python dependencies are documented in `requirements.txt`. We recommend using +a python virtual environment. Dependencies can be installed with pip: + +```sh +pip install -r requirements.lock +``` + +There are unit tests for some utility code, which include checks that data files +are available at the expected locations. To run them, install and run pytest: + +```sh +pip install pytest +pytest +``` + +Code notebooks can be run using jupyter-lab or a jupyter-aware IDE such as VS Code. + + + -*NOTE: some code and data files have been copied from a different repository that includes work beyond the scope of this article; some paths and file references may not match exactly in this version.* \ No newline at end of file diff --git a/missing_data/MissingBooks.ipynb b/missing_data/MissingBooks.ipynb index 3fddc5c..3bbd450 100644 --- a/missing_data/MissingBooks.ipynb +++ b/missing_data/MissingBooks.ipynb @@ -24,19 +24,6 @@ "from utils.missing_data_processing import get_preprocessed_data\n" ] }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# TEMPORARY, remove me\n", - "from importlib import reload\n", - "import utils.missing_data_processing \n", - "reload(utils.missing_data_processing)\n", - "from utils.missing_data_processing import get_preprocessed_data\n" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -700,27 +687,6 @@ "borrows[['start_date', 'end_date', 'member_names', 'item_title']].head(10)" ] }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import copia\n", - "reload(copia)" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ed644da --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +# pyproject.toml +[tool.pytest.ini_options] +minversion = "8.1" +pythonpath = "." diff --git a/requirements.lock b/requirements.lock new file mode 100644 index 0000000..763538a --- /dev/null +++ b/requirements.lock @@ -0,0 +1,172 @@ +altair==4.2.2 +altair-data-server==0.4.1 +altair-saver==0.5.0 +altair-viewer==0.4.0 +anyconfig==0.13.0 +anyio==4.3.0 +appnope==0.1.4 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 +asttokens==2.4.1 +async-generator==1.10 +async-lru==2.0.4 +attrs==22.2.0 +Babel==2.14.0 +beautifulsoup4==4.12.3 +binpickle==0.3.4 +bleach==6.1.0 +certifi==2022.12.7 +cffi==1.15.1 +chardet==5.2.0 +charset-normalizer==3.0.1 +click==8.1.7 +cmdstanpy==1.2.2 +comm==0.2.2 +commonmark==0.9.1 +contourpy==1.2.1 +copia==0.1.4 +coverage==7.5.0 +cryptography==39.0.1 +csr==0.5.1 +cycler==0.12.1 +DateTimeRange==2.2.1 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +docopt==0.6.2 +entrypoints==0.4 +exceptiongroup==1.1.0 +executing==2.0.1 +fastjsonschema==2.19.1 +fonttools==4.51.0 +fqdn==1.5.1 +great-tables==0.5.0 +h11==0.14.0 +holidays==0.46 +htmltools==0.5.1 +httpcore==1.0.5 +httpx==0.27.0 +idna==3.4 +importlib_metadata==7.1.0 +importlib_resources==6.4.0 +iniconfig==2.0.0 +ipykernel==6.29.4 +ipython==8.18.1 +ipywidgets==8.1.2 +isoduration==20.11.0 +jedi==0.19.1 +Jinja2==3.1.2 +json5==0.9.24 +jsonpointer==2.4 +jsonschema==4.21.1 +jsonschema-specifications==2023.12.1 +jupyter==1.0.0 +jupyter-console==6.6.3 +jupyter-events==0.10.0 +jupyter-lsp==2.2.5 +jupyter_client==8.6.1 +jupyter_core==5.7.2 +jupyter_server==2.14.0 +jupyter_server_terminals==0.5.3 +jupyterlab==4.1.6 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.26.0 +jupyterlab_widgets==3.0.10 +kiwisolver==1.4.5 +lenskit==0.14.4 +llvmlite==0.41.1 +MarkupSafe==2.1.2 +matplotlib==3.7.0 +matplotlib-inline==0.1.6 +mbstrdecoder==1.1.3 +mistune==3.0.2 +mpmath==1.3.0 +msgpack==1.0.8 +nbclient==0.10.0 +nbconvert==7.16.3 +nbformat==5.10.4 +nest-asyncio==1.6.0 +notebook==7.1.2 +notebook_shim==0.2.4 +numba==0.58.1 +numpy==1.24.2 +outcome==1.2.0 +overrides==7.7.0 +packaging==23.0 +pandas==2.2.2 +pandocfilters==1.5.1 +papermill==2.5.0 +parso==0.8.4 +pexpect==4.9.0 +pillow==10.3.0 +platformdirs==4.2.0 +pluggy==1.5.0 +portpicker==1.5.2 +powerlaw==1.5 +prometheus_client==0.20.0 +prompt-toolkit==3.0.43 +prophet==1.1.5 +psutil==5.9.4 +ptitprince==0.2.7 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +Pygments==2.17.2 +pyOpenSSL==23.0.0 +pyparsing==3.1.2 +pyrsistent==0.19.3 +PySocks==1.7.1 +pytest==8.1.1 +pytest-cov==5.0.0 +python-dateutil==2.8.2 +python-dotenv==0.21.1 +python-json-logger==2.0.7 +pytz==2022.7.1 +PyYAML==6.0.1 +pyzmq==25.1.2 +qtconsole==5.5.1 +QtPy==2.4.1 +referencing==0.34.0 +requests==2.31.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rpds-py==0.18.0 +scipy==1.13.0 +seaborn==0.11.0 +seedbank==0.1.3 +selenium==4.2.0 +Send2Trash==1.8.3 +six==1.16.0 +sniffio==1.3.0 +sortedcontainers==2.4.0 +soupsieve==2.5 +stack-data==0.6.3 +stanio==0.5.0 +tenacity==8.2.3 +terminado==0.18.1 +tinycss2==1.2.1 +tomli==2.0.1 +toolz==0.12.0 +tornado==6.2 +tqdm==4.64.1 +traitlets==5.14.2 +treon==0.1.4 +trio==0.22.0 +trio-websocket==0.9.2 +typepy==1.3.2 +types-python-dateutil==2.9.0.20240316 +typing_extensions==4.11.0 +tzdata==2024.1 +uri-template==1.3.0 +urllib3==1.26.14 +urllib3-secure-extra==0.1.0 +vl-convert-python==1.3.0 +wcwidth==0.2.13 +webcolors==1.13 +webdriver-manager==3.8.5 +webencodings==0.5.1 +websocket-client==1.7.0 +widgetsnbextension==4.0.10 +wsproto==1.2.0 +zipp==3.18.1