diff --git a/.gitignore b/.gitignore
index 894a44c..ff9fefd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,3 +102,5 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+log/
diff --git a/README.md b/README.md
index f2b3948..1b504f2 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,12 @@
 # finetune-transformer-lm
 Code and model for the paper "Improving Language Understanding by Generative Pre-Training"
 
+Before running this code, you need to:
+1. `pip install -r requirements.txt`. If you lack a GPU, see `requirements.txt` for necessary modifications
+2.  `python -m spacy download en`
+3. Export the "val set" and "test set" from the ROC stories corpus (see below) as CSV files 
+    with the default filenames and place them in a `data` subdirectory under this repository.
+
 Currently this code implements the ROCStories Cloze Test result reported in the paper by running:
 `python train.py --dataset rocstories --desc rocstories --submit --analysis --data_dir [path to data here]`
 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..694363e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+# older versions of these dependencies may work
+# these are simply the newest versions at the time this file was made
+joblib>=0.12.5
+numpy>=1.15.4
+# change to just tensorflow if you don't have a GPU
+tensorflow-gpu>=1.11.0
+tqdm>=4.28.1
+scikit-learn>=0.19.2
+pandas>=0.23.4
+ftfy>=5.5.0
+spacy>=2.0.16