4 年之前 · d29bcf9353
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
 
				+dat/*.tsv.gz filter=lfs diff=lfs merge=lfs -text
			
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,487 @@
 
				+
			
 
				+# Custom ignore rules
			
 
				+
			
 
				+dat/*
			
 
				+!dat/*.tsv.gz
			
 
				+doc/projectregistration2022/projectregistration.pdf
			
 
				+
			
 
				+
			
 
				+# Created by https://www.toptal.com/developers/gitignore/api/latex,jupyternotebooks,python
			
 
				+# Edit at https://www.toptal.com/developers/gitignore?templates=latex,jupyternotebooks,python
			
 
				+
			
 
				+### JupyterNotebooks ###
			
 
				+# gitignore template for Jupyter Notebooks
			
 
				+# website: http://jupyter.org/
			
 
				+
			
 
				+.ipynb_checkpoints
			
 
				+*/.ipynb_checkpoints/*
			
 
				+
			
 
				+# IPython
			
 
				+profile_default/
			
 
				+ipython_config.py
			
 
				+
			
 
				+# Remove previous ipynb_checkpoints
			
 
				+#   git rm -r .ipynb_checkpoints/
			
 
				+
			
 
				+### LaTeX ###
			
 
				+## Core latex/pdflatex auxiliary files:
			
 
				+*.aux
			
 
				+*.lof
			
 
				+*.log
			
 
				+*.lot
			
 
				+*.fls
			
 
				+*.out
			
 
				+*.toc
			
 
				+*.fmt
			
 
				+*.fot
			
 
				+*.cb
			
 
				+*.cb2
			
 
				+.*.lb
			
 
				+
			
 
				+## Intermediate documents:
			
 
				+*.dvi
			
 
				+*.xdv
			
 
				+*-converted-to.*
			
 
				+# these rules might exclude image files for figures etc.
			
 
				+# *.ps
			
 
				+# *.eps
			
 
				+# *.pdf
			
 
				+
			
 
				+## Generated if empty string is given at "Please type another file name for output:"
			
 
				+.pdf
			
 
				+
			
 
				+## Bibliography auxiliary files (bibtex/biblatex/biber):
			
 
				+*.bbl
			
 
				+*.bcf
			
 
				+*.blg
			
 
				+*-blx.aux
			
 
				+*-blx.bib
			
 
				+*.run.xml
			
 
				+
			
 
				+## Build tool auxiliary files:
			
 
				+*.fdb_latexmk
			
 
				+*.synctex
			
 
				+*.synctex(busy)
			
 
				+*.synctex.gz
			
 
				+*.synctex.gz(busy)
			
 
				+*.pdfsync
			
 
				+
			
 
				+## Build tool directories for auxiliary files
			
 
				+# latexrun
			
 
				+latex.out/
			
 
				+
			
 
				+## Auxiliary and intermediate files from other packages:
			
 
				+# algorithms
			
 
				+*.alg
			
 
				+*.loa
			
 
				+
			
 
				+# achemso
			
 
				+acs-*.bib
			
 
				+
			
 
				+# amsthm
			
 
				+*.thm
			
 
				+
			
 
				+# beamer
			
 
				+*.nav
			
 
				+*.pre
			
 
				+*.snm
			
 
				+*.vrb
			
 
				+
			
 
				+# changes
			
 
				+*.soc
			
 
				+
			
 
				+# comment
			
 
				+*.cut
			
 
				+
			
 
				+# cprotect
			
 
				+*.cpt
			
 
				+
			
 
				+# elsarticle (documentclass of Elsevier journals)
			
 
				+*.spl
			
 
				+
			
 
				+# endnotes
			
 
				+*.ent
			
 
				+
			
 
				+# fixme
			
 
				+*.lox
			
 
				+
			
 
				+# feynmf/feynmp
			
 
				+*.mf
			
 
				+*.mp
			
 
				+*.t[1-9]
			
 
				+*.t[1-9][0-9]
			
 
				+*.tfm
			
 
				+
			
 
				+#(r)(e)ledmac/(r)(e)ledpar
			
 
				+*.end
			
 
				+*.?end
			
 
				+*.[1-9]
			
 
				+*.[1-9][0-9]
			
 
				+*.[1-9][0-9][0-9]
			
 
				+*.[1-9]R
			
 
				+*.[1-9][0-9]R
			
 
				+*.[1-9][0-9][0-9]R
			
 
				+*.eledsec[1-9]
			
 
				+*.eledsec[1-9]R
			
 
				+*.eledsec[1-9][0-9]
			
 
				+*.eledsec[1-9][0-9]R
			
 
				+*.eledsec[1-9][0-9][0-9]
			
 
				+*.eledsec[1-9][0-9][0-9]R
			
 
				+
			
 
				+# glossaries
			
 
				+*.acn
			
 
				+*.acr
			
 
				+*.glg
			
 
				+*.glo
			
 
				+*.gls
			
 
				+*.glsdefs
			
 
				+*.lzo
			
 
				+*.lzs
			
 
				+*.slg
			
 
				+*.slo
			
 
				+*.sls
			
 
				+
			
 
				+# uncomment this for glossaries-extra (will ignore makeindex's style files!)
			
 
				+# *.ist
			
 
				+
			
 
				+# gnuplot
			
 
				+*.gnuplot
			
 
				+*.table
			
 
				+
			
 
				+# gnuplottex
			
 
				+*-gnuplottex-*
			
 
				+
			
 
				+# gregoriotex
			
 
				+*.gaux
			
 
				+*.glog
			
 
				+*.gtex
			
 
				+
			
 
				+# htlatex
			
 
				+*.4ct
			
 
				+*.4tc
			
 
				+*.idv
			
 
				+*.lg
			
 
				+*.trc
			
 
				+*.xref
			
 
				+
			
 
				+# hyperref
			
 
				+*.brf
			
 
				+
			
 
				+# knitr
			
 
				+*-concordance.tex
			
 
				+# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
			
 
				+# *.tikz
			
 
				+*-tikzDictionary
			
 
				+
			
 
				+# listings
			
 
				+*.lol
			
 
				+
			
 
				+# luatexja-ruby
			
 
				+*.ltjruby
			
 
				+
			
 
				+# makeidx
			
 
				+*.idx
			
 
				+*.ilg
			
 
				+*.ind
			
 
				+
			
 
				+# minitoc
			
 
				+*.maf
			
 
				+*.mlf
			
 
				+*.mlt
			
 
				+*.mtc[0-9]*
			
 
				+*.slf[0-9]*
			
 
				+*.slt[0-9]*
			
 
				+*.stc[0-9]*
			
 
				+
			
 
				+# minted
			
 
				+_minted*
			
 
				+*.pyg
			
 
				+
			
 
				+# morewrites
			
 
				+*.mw
			
 
				+
			
 
				+# newpax
			
 
				+*.newpax
			
 
				+
			
 
				+# nomencl
			
 
				+*.nlg
			
 
				+*.nlo
			
 
				+*.nls
			
 
				+
			
 
				+# pax
			
 
				+*.pax
			
 
				+
			
 
				+# pdfpcnotes
			
 
				+*.pdfpc
			
 
				+
			
 
				+# sagetex
			
 
				+*.sagetex.sage
			
 
				+*.sagetex.py
			
 
				+*.sagetex.scmd
			
 
				+
			
 
				+# scrwfile
			
 
				+*.wrt
			
 
				+
			
 
				+# svg
			
 
				+svg-inkscape/
			
 
				+
			
 
				+# sympy
			
 
				+*.sout
			
 
				+*.sympy
			
 
				+sympy-plots-for-*.tex/
			
 
				+
			
 
				+# pdfcomment
			
 
				+*.upa
			
 
				+*.upb
			
 
				+
			
 
				+# pythontex
			
 
				+*.pytxcode
			
 
				+pythontex-files-*/
			
 
				+
			
 
				+# tcolorbox
			
 
				+*.listing
			
 
				+
			
 
				+# thmtools
			
 
				+*.loe
			
 
				+
			
 
				+# TikZ & PGF
			
 
				+*.dpth
			
 
				+*.md5
			
 
				+*.auxlock
			
 
				+
			
 
				+# titletoc
			
 
				+*.ptc
			
 
				+
			
 
				+# todonotes
			
 
				+*.tdo
			
 
				+
			
 
				+# vhistory
			
 
				+*.hst
			
 
				+*.ver
			
 
				+
			
 
				+# easy-todo
			
 
				+*.lod
			
 
				+
			
 
				+# xcolor
			
 
				+*.xcp
			
 
				+
			
 
				+# xmpincl
			
 
				+*.xmpi
			
 
				+
			
 
				+# xindy
			
 
				+*.xdy
			
 
				+
			
 
				+# xypic precompiled matrices and outlines
			
 
				+*.xyc
			
 
				+*.xyd
			
 
				+
			
 
				+# endfloat
			
 
				+*.ttt
			
 
				+*.fff
			
 
				+
			
 
				+# Latexian
			
 
				+TSWLatexianTemp*
			
 
				+
			
 
				+## Editors:
			
 
				+# WinEdt
			
 
				+*.bak
			
 
				+*.sav
			
 
				+
			
 
				+# Texpad
			
 
				+.texpadtmp
			
 
				+
			
 
				+# LyX
			
 
				+*.lyx~
			
 
				+
			
 
				+# Kile
			
 
				+*.backup
			
 
				+
			
 
				+# gummi
			
 
				+.*.swp
			
 
				+
			
 
				+# KBibTeX
			
 
				+*~[0-9]*
			
 
				+
			
 
				+# TeXnicCenter
			
 
				+*.tps
			
 
				+
			
 
				+# auto folder when using emacs and auctex
			
 
				+./auto/*
			
 
				+*.el
			
 
				+
			
 
				+# expex forward references with \gathertags
			
 
				+*-tags.tex
			
 
				+
			
 
				+# standalone packages
			
 
				+*.sta
			
 
				+
			
 
				+# Makeindex log files
			
 
				+*.lpz
			
 
				+
			
 
				+# xwatermark package
			
 
				+*.xwm
			
 
				+
			
 
				+# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
			
 
				+# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
			
 
				+# Uncomment the next line to have this generated file ignored.
			
 
				+#*Notes.bib
			
 
				+
			
 
				+### LaTeX Patch ###
			
 
				+# LIPIcs / OASIcs
			
 
				+*.vtc
			
 
				+
			
 
				+# glossaries
			
 
				+*.glstex
			
 
				+
			
 
				+### Python ###
			
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+wheels/
			
 
				+share/python-wheels/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+MANIFEST
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.nox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*.cover
			
 
				+*.py,cover
			
 
				+.hypothesis/
			
 
				+.pytest_cache/
			
 
				+cover/
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+local_settings.py
			
 
				+db.sqlite3
			
 
				+db.sqlite3-journal
			
 
				+
			
 
				+# Flask stuff:
			
 
				+instance/
			
 
				+.webassets-cache
			
 
				+
			
 
				+# Scrapy stuff:
			
 
				+.scrapy
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+.pybuilder/
			
 
				+target/
			
 
				+
			
 
				+# Jupyter Notebook
			
 
				+
			
 
				+# IPython
			
 
				+
			
 
				+# pyenv
			
 
				+#   For a library or package, you might want to ignore these files since the code is
			
 
				+#   intended to run in multiple environments; otherwise, check them in:
			
 
				+# .python-version
			
 
				+
			
 
				+# pipenv
			
 
				+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
			
 
				+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
			
 
				+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
			
 
				+#   install all needed dependencies.
			
 
				+#Pipfile.lock
			
 
				+
			
 
				+# poetry
			
 
				+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
			
 
				+#   This is especially recommended for binary packages to ensure reproducibility, and is more
			
 
				+#   commonly ignored for libraries.
			
 
				+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
			
 
				+#poetry.lock
			
 
				+
			
 
				+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
			
 
				+__pypackages__/
			
 
				+
			
 
				+# Celery stuff
			
 
				+celerybeat-schedule
			
 
				+celerybeat.pid
			
 
				+
			
 
				+# SageMath parsed files
			
 
				+*.sage.py
			
 
				+
			
 
				+# Environments
			
 
				+.env
			
 
				+.venv
			
 
				+env/
			
 
				+venv/
			
 
				+ENV/
			
 
				+env.bak/
			
 
				+venv.bak/
			
 
				+
			
 
				+# Spyder project settings
			
 
				+.spyderproject
			
 
				+.spyproject
			
 
				+
			
 
				+# Rope project settings
			
 
				+.ropeproject
			
 
				+
			
 
				+# mkdocs documentation
			
 
				+/site
			
 
				+
			
 
				+# mypy
			
 
				+.mypy_cache/
			
 
				+.dmypy.json
			
 
				+dmypy.json
			
 
				+
			
 
				+# Pyre type checker
			
 
				+.pyre/
			
 
				+
			
 
				+# pytype static type analyzer
			
 
				+.pytype/
			
 
				+
			
 
				+# Cython debug symbols
			
 
				+cython_debug/
			
 
				+
			
 
				+# PyCharm
			
 
				+#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
			
 
				+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
			
 
				+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
			
 
				+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
			
 
				+#.idea/
			
 
				+
			
 
				+# End of https://www.toptal.com/developers/gitignore/api/latex,jupyternotebooks,python
			
 
				+
			
--- a/dat/title.basics.tsv.gz
+++ b/dat/title.basics.tsv.gz
 
				+oid sha256:f9454fae364a7848af28d763c84963a6fff6ff06d936e4f0fd8174834ed01441
			
 
				+size 151455252
			
--- a/dat/title.principals.tsv.gz
+++ b/dat/title.principals.tsv.gz
 
				+oid sha256:7516751fcd51991d13adc04f5b24291d0b46e65475dd4183df1ad4dee7ee273f
			
 
				+size 386712793
			
--- a/dat/title.ratings.tsv.gz
+++ b/dat/title.ratings.tsv.gz
 
				+oid sha256:7891c9b416445df2c72abe4bedf436abfa760623f8f6f9ad9a37b44bd40c45cb
			
 
				+size 6035379
			
--- a/doc/projectregistration2022/neurips_2021.sty
+++ b/doc/projectregistration2022/neurips_2021.sty
@@ -0,0 +1,377 @@
 
				+% partial rewrite of the LaTeX2e package for submissions to the
			
 
				+% Conference on Neural Information Processing Systems (NeurIPS):
			
 
				+%
			
 
				+% - uses more LaTeX conventions
			
 
				+% - line numbers at submission time replaced with aligned numbers from
			
 
				+%   lineno package
			
 
				+% - \nipsfinalcopy replaced with [final] package option
			
 
				+% - automatically loads times package for authors
			
 
				+% - loads natbib automatically; this can be suppressed with the
			
 
				+%   [nonatbib] package option
			
 
				+% - adds foot line to first page identifying the conference
			
 
				+% - adds preprint option for submission to e.g. arXiv
			
 
				+% - conference acronym modified
			
 
				+%
			
 
				+% Roman Garnett (garnett@wustl.edu) and the many authors of
			
 
				+% nips15submit_e.sty, including MK and drstrip@sandia
			
 
				+%
			
 
				+% last revision: March 2021
			
 
				+
			
 
				+\NeedsTeXFormat{LaTeX2e}
			
 
				+\ProvidesPackage{neurips_2021}[2021/03/31 NeurIPS 2021 submission/camera-ready style file]
			
 
				+
			
 
				+% declare final option, which creates camera-ready copy
			
 
				+\newif\if@neuripsfinal\@neuripsfinalfalse
			
 
				+\DeclareOption{final}{
			
 
				+  \@neuripsfinaltrue
			
 
				+}
			
 
				+
			
 
				+% declare nonatbib option, which does not load natbib in case of
			
 
				+% package clash (users can pass options to natbib via
			
 
				+% \PassOptionsToPackage)
			
 
				+\newif\if@natbib\@natbibtrue
			
 
				+\DeclareOption{nonatbib}{
			
 
				+  \@natbibfalse
			
 
				+}
			
 
				+
			
 
				+% declare preprint option, which creates a preprint version ready for
			
 
				+% upload to, e.g., arXiv
			
 
				+\newif\if@preprint\@preprintfalse
			
 
				+\DeclareOption{preprint}{
			
 
				+  \@preprinttrue
			
 
				+}
			
 
				+
			
 
				+\ProcessOptions\relax
			
 
				+
			
 
				+% determine whether this is an anonymized submission
			
 
				+\newif\if@submission\@submissiontrue
			
 
				+\if@neuripsfinal\@submissionfalse\fi
			
 
				+\if@preprint\@submissionfalse\fi
			
 
				+
			
 
				+% fonts
			
 
				+\renewcommand{\rmdefault}{ptm}
			
 
				+\renewcommand{\sfdefault}{phv}
			
 
				+
			
 
				+% change this every year for notice string at bottom
			
 
				+\newcommand{\@neuripsordinal}{35th}
			
 
				+\newcommand{\@neuripsyear}{2021}
			
 
				+\newcommand{\@neuripslocation}{virtual}
			
 
				+
			
 
				+% acknowledgments
			
 
				+\usepackage{environ}
			
 
				+\newcommand{\acksection}{\section*{Acknowledgments and Disclosure of Funding}}
			
 
				+\NewEnviron{ack}{%
			
 
				+  \acksection
			
 
				+  \BODY
			
 
				+}
			
 
				+
			
 
				+% handle tweaks for camera-ready copy vs. submission copy
			
 
				+\if@preprint
			
 
				+  \newcommand{\@noticestring}{%
			
 
				+    Project Report for \emph{Data Literacy} 2021/22
			
 
				+  }
			
 
				+\else
			
 
				+  \if@neuripsfinal
			
 
				+    \newcommand{\@noticestring}{%
			
 
				+      \@neuripsordinal\/ Conference on Neural Information Processing Systems
			
 
				+      (NeurIPS \@neuripsyear).%, \@neuripslocation.%
			
 
				+    }
			
 
				+  \else
			
 
				+    \newcommand{\@noticestring}{%
			
 
				+      Submitted to \@neuripsordinal\/ Conference on Neural Information
			
 
				+      Processing Systems (NeurIPS \@neuripsyear). Do not distribute.%
			
 
				+    }
			
 
				+
			
 
				+    % hide the acknowledgements
			
 
				+    \NewEnviron{hide}{}
			
 
				+    \let\ack\hide
			
 
				+    \let\endack\endhide
			
 
				+
			
 
				+    % line numbers for submission
			
 
				+    \RequirePackage{lineno}
			
 
				+    \linenumbers
			
 
				+
			
 
				+    % fix incompatibilities between lineno and amsmath, if required, by
			
 
				+    % transparently wrapping linenomath environments around amsmath
			
 
				+    % environments
			
 
				+    \AtBeginDocument{%
			
 
				+      \@ifpackageloaded{amsmath}{%
			
 
				+        \newcommand*\patchAmsMathEnvironmentForLineno[1]{%
			
 
				+          \expandafter\let\csname old#1\expandafter\endcsname\csname #1\endcsname
			
 
				+          \expandafter\let\csname oldend#1\expandafter\endcsname\csname end#1\endcsname
			
 
				+          \renewenvironment{#1}%
			
 
				+                           {\linenomath\csname old#1\endcsname}%
			
 
				+                           {\csname oldend#1\endcsname\endlinenomath}%
			
 
				+        }%
			
 
				+        \newcommand*\patchBothAmsMathEnvironmentsForLineno[1]{%
			
 
				+          \patchAmsMathEnvironmentForLineno{#1}%
			
 
				+          \patchAmsMathEnvironmentForLineno{#1*}%
			
 
				+        }%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{equation}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{align}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{flalign}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{alignat}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{gather}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{multline}%
			
 
				+      }{}
			
 
				+    }
			
 
				+  \fi
			
 
				+\fi
			
 
				+
			
 
				+% load natbib unless told otherwise
			
 
				+\if@natbib
			
 
				+  \RequirePackage{natbib}
			
 
				+\fi
			
 
				+
			
 
				+% set page geometry
			
 
				+\usepackage[verbose=true,letterpaper]{geometry}
			
 
				+\AtBeginDocument{
			
 
				+  \newgeometry{
			
 
				+    textheight=9in,
			
 
				+    textwidth=5.5in,
			
 
				+    top=1in,
			
 
				+    headheight=12pt,
			
 
				+    headsep=25pt,
			
 
				+    footskip=30pt
			
 
				+  }
			
 
				+  \@ifpackageloaded{fullpage}
			
 
				+    {\PackageWarning{neurips_2021}{fullpage package not allowed! Overwriting formatting.}}
			
 
				+    {}
			
 
				+}
			
 
				+
			
 
				+\widowpenalty=10000
			
 
				+\clubpenalty=10000
			
 
				+\flushbottom
			
 
				+\sloppy
			
 
				+
			
 
				+% font sizes with reduced leading
			
 
				+\renewcommand{\normalsize}{%
			
 
				+  \@setfontsize\normalsize\@xpt\@xipt
			
 
				+  \abovedisplayskip      7\p@ \@plus 2\p@ \@minus 5\p@
			
 
				+  \abovedisplayshortskip \z@ \@plus 3\p@
			
 
				+  \belowdisplayskip      \abovedisplayskip
			
 
				+  \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@
			
 
				+}
			
 
				+\normalsize
			
 
				+\renewcommand{\small}{%
			
 
				+  \@setfontsize\small\@ixpt\@xpt
			
 
				+  \abovedisplayskip      6\p@ \@plus 1.5\p@ \@minus 4\p@
			
 
				+  \abovedisplayshortskip \z@  \@plus 2\p@
			
 
				+  \belowdisplayskip      \abovedisplayskip
			
 
				+  \belowdisplayshortskip 3\p@ \@plus 2\p@   \@minus 2\p@
			
 
				+}
			
 
				+\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt}
			
 
				+\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt}
			
 
				+\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt}
			
 
				+\renewcommand{\large}{\@setfontsize\large\@xiipt{14}}
			
 
				+\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}}
			
 
				+\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}}
			
 
				+\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}}
			
 
				+\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}}
			
 
				+
			
 
				+% sections with less space
			
 
				+\providecommand{\section}{}
			
 
				+\renewcommand{\section}{%
			
 
				+  \@startsection{section}{1}{\z@}%
			
 
				+                {-2.0ex \@plus -0.5ex \@minus -0.2ex}%
			
 
				+                { 1.5ex \@plus  0.3ex \@minus  0.2ex}%
			
 
				+                {\large\bf\raggedright}%
			
 
				+}
			
 
				+\providecommand{\subsection}{}
			
 
				+\renewcommand{\subsection}{%
			
 
				+  \@startsection{subsection}{2}{\z@}%
			
 
				+                {-1.8ex \@plus -0.5ex \@minus -0.2ex}%
			
 
				+                { 0.8ex \@plus  0.2ex}%
			
 
				+                {\normalsize\bf\raggedright}%
			
 
				+}
			
 
				+\providecommand{\subsubsection}{}
			
 
				+\renewcommand{\subsubsection}{%
			
 
				+  \@startsection{subsubsection}{3}{\z@}%
			
 
				+                {-1.5ex \@plus -0.5ex \@minus -0.2ex}%
			
 
				+                { 0.5ex \@plus  0.2ex}%
			
 
				+                {\normalsize\bf\raggedright}%
			
 
				+}
			
 
				+\providecommand{\paragraph}{}
			
 
				+\renewcommand{\paragraph}{%
			
 
				+  \@startsection{paragraph}{4}{\z@}%
			
 
				+                {1.5ex \@plus 0.5ex \@minus 0.2ex}%
			
 
				+                {-1em}%
			
 
				+                {\normalsize\bf}%
			
 
				+}
			
 
				+\providecommand{\subparagraph}{}
			
 
				+\renewcommand{\subparagraph}{%
			
 
				+  \@startsection{subparagraph}{5}{\z@}%
			
 
				+                {1.5ex \@plus 0.5ex \@minus 0.2ex}%
			
 
				+                {-1em}%
			
 
				+                {\normalsize\bf}%
			
 
				+}
			
 
				+\providecommand{\subsubsubsection}{}
			
 
				+\renewcommand{\subsubsubsection}{%
			
 
				+  \vskip5pt{\noindent\normalsize\rm\raggedright}%
			
 
				+}
			
 
				+
			
 
				+% float placement
			
 
				+\renewcommand{\topfraction      }{0.85}
			
 
				+\renewcommand{\bottomfraction   }{0.4}
			
 
				+\renewcommand{\textfraction     }{0.1}
			
 
				+\renewcommand{\floatpagefraction}{0.7}
			
 
				+
			
 
				+\newlength{\@neuripsabovecaptionskip}\setlength{\@neuripsabovecaptionskip}{7\p@}
			
 
				+\newlength{\@neuripsbelowcaptionskip}\setlength{\@neuripsbelowcaptionskip}{\z@}
			
 
				+
			
 
				+\setlength{\abovecaptionskip}{\@neuripsabovecaptionskip}
			
 
				+\setlength{\belowcaptionskip}{\@neuripsbelowcaptionskip}
			
 
				+
			
 
				+% swap above/belowcaptionskip lengths for tables
			
 
				+\renewenvironment{table}
			
 
				+  {\setlength{\abovecaptionskip}{\@neuripsbelowcaptionskip}%
			
 
				+   \setlength{\belowcaptionskip}{\@neuripsabovecaptionskip}%
			
 
				+   \@float{table}}
			
 
				+  {\end@float}
			
 
				+
			
 
				+% footnote formatting
			
 
				+\setlength{\footnotesep }{6.65\p@}
			
 
				+\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@}
			
 
				+\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@}
			
 
				+\setcounter{footnote}{0}
			
 
				+
			
 
				+% paragraph formatting
			
 
				+\setlength{\parindent}{\z@}
			
 
				+\setlength{\parskip  }{5.5\p@}
			
 
				+
			
 
				+% list formatting
			
 
				+\setlength{\topsep       }{4\p@ \@plus 1\p@   \@minus 2\p@}
			
 
				+\setlength{\partopsep    }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@}
			
 
				+\setlength{\itemsep      }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
			
 
				+\setlength{\parsep       }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
			
 
				+\setlength{\leftmargin   }{3pc}
			
 
				+\setlength{\leftmargini  }{\leftmargin}
			
 
				+\setlength{\leftmarginii }{2em}
			
 
				+\setlength{\leftmarginiii}{1.5em}
			
 
				+\setlength{\leftmarginiv }{1.0em}
			
 
				+\setlength{\leftmarginv  }{0.5em}
			
 
				+\def\@listi  {\leftmargin\leftmargini}
			
 
				+\def\@listii {\leftmargin\leftmarginii
			
 
				+              \labelwidth\leftmarginii
			
 
				+              \advance\labelwidth-\labelsep
			
 
				+              \topsep  2\p@ \@plus 1\p@    \@minus 0.5\p@
			
 
				+              \parsep  1\p@ \@plus 0.5\p@ \@minus 0.5\p@
			
 
				+              \itemsep \parsep}
			
 
				+\def\@listiii{\leftmargin\leftmarginiii
			
 
				+              \labelwidth\leftmarginiii
			
 
				+              \advance\labelwidth-\labelsep
			
 
				+              \topsep    1\p@ \@plus 0.5\p@ \@minus 0.5\p@
			
 
				+              \parsep    \z@
			
 
				+              \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@
			
 
				+              \itemsep \topsep}
			
 
				+\def\@listiv {\leftmargin\leftmarginiv
			
 
				+              \labelwidth\leftmarginiv
			
 
				+              \advance\labelwidth-\labelsep}
			
 
				+\def\@listv  {\leftmargin\leftmarginv
			
 
				+              \labelwidth\leftmarginv
			
 
				+              \advance\labelwidth-\labelsep}
			
 
				+\def\@listvi {\leftmargin\leftmarginvi
			
 
				+              \labelwidth\leftmarginvi
			
 
				+              \advance\labelwidth-\labelsep}
			
 
				+
			
 
				+% create title
			
 
				+\providecommand{\maketitle}{}
			
 
				+\renewcommand{\maketitle}{%
			
 
				+  \par
			
 
				+  \begingroup
			
 
				+    \renewcommand{\thefootnote}{\fnsymbol{footnote}}
			
 
				+    % for perfect author name centering
			
 
				+    \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}}
			
 
				+    % The footnote-mark was overlapping the footnote-text,
			
 
				+    % added the following to fix this problem               (MK)
			
 
				+    \long\def\@makefntext##1{%
			
 
				+      \parindent 1em\noindent
			
 
				+      \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1
			
 
				+    }
			
 
				+    \thispagestyle{empty}
			
 
				+    \@maketitle
			
 
				+    \@thanks
			
 
				+    \@notice
			
 
				+  \endgroup
			
 
				+  \let\maketitle\relax
			
 
				+  \let\thanks\relax
			
 
				+}
			
 
				+
			
 
				+% rules for title box at top of first page
			
 
				+\newcommand{\@toptitlebar}{
			
 
				+  \hrule height 4\p@
			
 
				+  \vskip 0.25in
			
 
				+  \vskip -\parskip%
			
 
				+}
			
 
				+\newcommand{\@bottomtitlebar}{
			
 
				+  \vskip 0.29in
			
 
				+  \vskip -\parskip
			
 
				+  \hrule height 1\p@
			
 
				+  \vskip 0.09in%
			
 
				+}
			
 
				+
			
 
				+% create title (includes both anonymized and non-anonymized versions)
			
 
				+\providecommand{\@maketitle}{}
			
 
				+\renewcommand{\@maketitle}{%
			
 
				+  \vbox{%
			
 
				+    \hsize\textwidth
			
 
				+    \linewidth\hsize
			
 
				+    \vskip 0.1in
			
 
				+    \@toptitlebar
			
 
				+    \centering
			
 
				+    {\LARGE\bf \@title\par}
			
 
				+    \@bottomtitlebar
			
 
				+    \if@submission
			
 
				+      \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}
			
 
				+        Anonymous Author(s) \\
			
 
				+        Affiliation \\
			
 
				+        Address \\
			
 
				+        \texttt{email} \\
			
 
				+      \end{tabular}%
			
 
				+    \else
			
 
				+      \def\And{%
			
 
				+        \end{tabular}\hfil\linebreak[0]\hfil%
			
 
				+        \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
			
 
				+      }
			
 
				+      \def\AND{%
			
 
				+        \end{tabular}\hfil\linebreak[4]\hfil%
			
 
				+        \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
			
 
				+      }
			
 
				+      \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}%
			
 
				+    \fi
			
 
				+    \vskip 0.3in \@minus 0.1in
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+% add conference notice to bottom of first page
			
 
				+\newcommand{\ftype@noticebox}{8}
			
 
				+\newcommand{\@notice}{%
			
 
				+  % give a bit of extra room back to authors on first page
			
 
				+  \enlargethispage{2\baselineskip}%
			
 
				+  \@float{noticebox}[b]%
			
 
				+    \footnotesize\@noticestring%
			
 
				+  \end@float%
			
 
				+}
			
 
				+
			
 
				+% abstract styling
			
 
				+\renewenvironment{abstract}%
			
 
				+{%
			
 
				+  \vskip 0.075in%
			
 
				+  \centerline%
			
 
				+  {\large\bf Abstract}%
			
 
				+  \vspace{0.5ex}%
			
 
				+  \begin{quote}%
			
 
				+}
			
 
				+{
			
 
				+  \par%
			
 
				+  \end{quote}%
			
 
				+  \vskip 1ex%
			
 
				+}
			
 
				+
			
 
				+% For the paper checklist
			
 
				+\newcommand{\answerYes}[1][]{\textcolor{blue}{[Yes] #1}}
			
 
				+\newcommand{\answerNo}[1][]{\textcolor{orange}{[No] #1}}
			
 
				+\newcommand{\answerNA}[1][]{\textcolor{gray}{[N/A] #1}}
			
 
				+\newcommand{\answerTODO}[1][]{\textcolor{red}{\bf [TODO]}}
			
 
				+
			
 
				+\endinput
			
--- a/doc/projectregistration2022/neurips_2021.tex
+++ b/doc/projectregistration2022/neurips_2021.tex
@@ -0,0 +1,491 @@
 
				+\documentclass{article}
			
 
				+
			
 
				+% if you need to pass options to natbib, use, e.g.:
			
 
				+%     \PassOptionsToPackage{numbers, compress}{natbib}
			
 
				+% before loading neurips_2021
			
 
				+
			
 
				+% ready for submission
			
 
				+\usepackage[preprint]{neurips_2021}
			
 
				+
			
 
				+% to compile a preprint version, e.g., for submission to arXiv, add add the
			
 
				+% [preprint] option:
			
 
				+%     \usepackage[preprint]{neurips_2021}
			
 
				+
			
 
				+% to compile a camera-ready version, add the [final] option, e.g.:
			
 
				+%     \usepackage[final]{neurips_2021}
			
 
				+
			
 
				+% to avoid loading the natbib package, add option nonatbib:
			
 
				+%    \usepackage[nonatbib]{neurips_2021}
			
 
				+
			
 
				+\usepackage[utf8]{inputenc} % allow utf-8 input
			
 
				+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
			
 
				+\usepackage{hyperref}       % hyperlinks
			
 
				+\usepackage{url}            % simple URL typesetting
			
 
				+\usepackage{booktabs}       % professional-quality tables
			
 
				+\usepackage{amsfonts}       % blackboard math symbols
			
 
				+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
			
 
				+\usepackage{microtype}      % microtypography
			
 
				+\usepackage{xcolor}         % colors
			
 
				+
			
 
				+\title{Formatting Instructions For NeurIPS 2021}
			
 
				+
			
 
				+% The \author macro works with any number of authors. There are two commands
			
 
				+% used to separate the names and addresses of multiple authors: \And and \AND.
			
 
				+%
			
 
				+% Using \And between authors leaves it to LaTeX to determine where to break the
			
 
				+% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
			
 
				+% authors names on the first line, and the last on the second line, try using
			
 
				+% \AND instead of \And before the third author name.
			
 
				+
			
 
				+\author{%
			
 
				+  David S.~Hippocampus\thanks{Use footnote for providing further information
			
 
				+    about author (webpage, alternative address)---\emph{not} for acknowledging
			
 
				+    funding agencies.} \\
			
 
				+  Department of Computer Science\\
			
 
				+  Cranberry-Lemon University\\
			
 
				+  Pittsburgh, PA 15213 \\
			
 
				+  \texttt{hippo@cs.cranberry-lemon.edu} \\
			
 
				+  % examples of more authors
			
 
				+  % \And
			
 
				+  % Coauthor \\
			
 
				+  % Affiliation \\
			
 
				+  % Address \\
			
 
				+  % \texttt{email} \\
			
 
				+  % \AND
			
 
				+  % Coauthor \\
			
 
				+  % Affiliation \\
			
 
				+  % Address \\
			
 
				+  % \texttt{email} \\
			
 
				+  % \And
			
 
				+  % Coauthor \\
			
 
				+  % Affiliation \\
			
 
				+  % Address \\
			
 
				+  % \texttt{email} \\
			
 
				+  % \And
			
 
				+  % Coauthor \\
			
 
				+  % Affiliation \\
			
 
				+  % Address \\
			
 
				+  % \texttt{email} \\
			
 
				+}
			
 
				+
			
 
				+\begin{document}
			
 
				+
			
 
				+\maketitle
			
 
				+
			
 
				+\begin{abstract}
			
 
				+  The abstract paragraph should be indented \nicefrac{1}{2}~inch (3~picas) on
			
 
				+  both the left- and right-hand margins. Use 10~point type, with a vertical
			
 
				+  spacing (leading) of 11~points.  The word \textbf{Abstract} must be centered,
			
 
				+  bold, and in point size 12. Two line spaces precede the abstract. The abstract
			
 
				+  must be limited to one paragraph.
			
 
				+\end{abstract}
			
 
				+
			
 
				+\section{Submission of papers to NeurIPS 2021}
			
 
				+
			
 
				+Please read the instructions below carefully and follow them faithfully.
			
 
				+
			
 
				+\subsection{Style}
			
 
				+
			
 
				+Papers to be submitted to NeurIPS 2021 must be prepared according to the
			
 
				+instructions presented here. Papers may only be up to {\bf nine} pages long,
			
 
				+including figures. Additional pages \emph{containing only acknowledgments and
			
 
				+references} are allowed. Papers that exceed the page limit will not be
			
 
				+reviewed, or in any other way considered for presentation at the conference.
			
 
				+
			
 
				+The margins in 2021 are the same as those in 2007, which allow for $\sim$$15\%$
			
 
				+more words in the paper compared to earlier years.
			
 
				+
			
 
				+Authors are required to use the NeurIPS \LaTeX{} style files obtainable at the
			
 
				+NeurIPS website as indicated below. Please make sure you use the current files
			
 
				+and not previous versions. Tweaking the style files may be grounds for
			
 
				+rejection.
			
 
				+
			
 
				+\subsection{Retrieval of style files}
			
 
				+
			
 
				+The style files for NeurIPS and other conference information are available on
			
 
				+the World Wide Web at
			
 
				+\begin{center}
			
 
				+  \url{http://www.neurips.cc/}
			
 
				+\end{center}
			
 
				+The file \verb+neurips_2021.pdf+ contains these instructions and illustrates the
			
 
				+various formatting requirements your NeurIPS paper must satisfy.
			
 
				+
			
 
				+The only supported style file for NeurIPS 2021 is \verb+neurips_2021.sty+,
			
 
				+rewritten for \LaTeXe{}.  \textbf{Previous style files for \LaTeX{} 2.09,
			
 
				+  Microsoft Word, and RTF are no longer supported!}
			
 
				+
			
 
				+The \LaTeX{} style file contains three optional arguments: \verb+final+, which
			
 
				+creates a camera-ready copy, \verb+preprint+, which creates a preprint for
			
 
				+submission to, e.g., arXiv, and \verb+nonatbib+, which will not load the
			
 
				+\verb+natbib+ package for you in case of package clash.
			
 
				+
			
 
				+\paragraph{Preprint option}
			
 
				+If you wish to post a preprint of your work online, e.g., on arXiv, using the
			
 
				+NeurIPS style, please use the \verb+preprint+ option. This will create a
			
 
				+nonanonymized version of your work with the text ``Preprint. Work in progress.''
			
 
				+in the footer. This version may be distributed as you see fit. Please \textbf{do
			
 
				+  not} use the \verb+final+ option, which should \textbf{only} be used for
			
 
				+papers accepted to NeurIPS.
			
 
				+
			
 
				+At submission time, please omit the \verb+final+ and \verb+preprint+
			
 
				+options. This will anonymize your submission and add line numbers to aid
			
 
				+review. Please do \emph{not} refer to these line numbers in your paper as they
			
 
				+will be removed during generation of camera-ready copies.
			
 
				+
			
 
				+The file \verb+neurips_2021.tex+ may be used as a ``shell'' for writing your
			
 
				+paper. All you have to do is replace the author, title, abstract, and text of
			
 
				+the paper with your own.
			
 
				+
			
 
				+The formatting instructions contained in these style files are summarized in
			
 
				+Sections \ref{gen_inst}, \ref{headings}, and \ref{others} below.
			
 
				+
			
 
				+\section{General formatting instructions}
			
 
				+\label{gen_inst}
			
 
				+
			
 
				+The text must be confined within a rectangle 5.5~inches (33~picas) wide and
			
 
				+9~inches (54~picas) long. The left margin is 1.5~inch (9~picas).  Use 10~point
			
 
				+type with a vertical spacing (leading) of 11~points.  Times New Roman is the
			
 
				+preferred typeface throughout, and will be selected for you by default.
			
 
				+Paragraphs are separated by \nicefrac{1}{2}~line space (5.5 points), with no
			
 
				+indentation.
			
 
				+
			
 
				+The paper title should be 17~point, initial caps/lower case, bold, centered
			
 
				+between two horizontal rules. The top rule should be 4~points thick and the
			
 
				+bottom rule should be 1~point thick. Allow \nicefrac{1}{4}~inch space above and
			
 
				+below the title to rules. All pages should start at 1~inch (6~picas) from the
			
 
				+top of the page.
			
 
				+
			
 
				+For the final version, authors' names are set in boldface, and each name is
			
 
				+centered above the corresponding address. The lead author's name is to be listed
			
 
				+first (left-most), and the co-authors' names (if different address) are set to
			
 
				+follow. If there is only one co-author, list both author and co-author side by
			
 
				+side.
			
 
				+
			
 
				+Please pay special attention to the instructions in Section \ref{others}
			
 
				+regarding figures, tables, acknowledgments, and references.
			
 
				+
			
 
				+\section{Headings: first level}
			
 
				+\label{headings}
			
 
				+
			
 
				+All headings should be lower case (except for first word and proper nouns),
			
 
				+flush left, and bold.
			
 
				+
			
 
				+First-level headings should be in 12-point type.
			
 
				+
			
 
				+\subsection{Headings: second level}
			
 
				+
			
 
				+Second-level headings should be in 10-point type.
			
 
				+
			
 
				+\subsubsection{Headings: third level}
			
 
				+
			
 
				+Third-level headings should be in 10-point type.
			
 
				+
			
 
				+\paragraph{Paragraphs}
			
 
				+
			
 
				+There is also a \verb+\paragraph+ command available, which sets the heading in
			
 
				+bold, flush left, and inline with the text, with the heading followed by 1\,em
			
 
				+of space.
			
 
				+
			
 
				+\section{Citations, figures, tables, references}
			
 
				+\label{others}
			
 
				+
			
 
				+These instructions apply to everyone.
			
 
				+
			
 
				+\subsection{Citations within the text}
			
 
				+
			
 
				+The \verb+natbib+ package will be loaded for you by default.  Citations may be
			
 
				+author/year or numeric, as long as you maintain internal consistency.  As to the
			
 
				+format of the references themselves, any style is acceptable as long as it is
			
 
				+used consistently.
			
 
				+
			
 
				+The documentation for \verb+natbib+ may be found at
			
 
				+\begin{center}
			
 
				+  \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf}
			
 
				+\end{center}
			
 
				+Of note is the command \verb+\citet+, which produces citations appropriate for
			
 
				+use in inline text.  For example,
			
 
				+\begin{verbatim}
			
 
				+   \citet{hasselmo} investigated\dots
			
 
				+\end{verbatim}
			
 
				+produces
			
 
				+\begin{quote}
			
 
				+  Hasselmo, et al.\ (1995) investigated\dots
			
 
				+\end{quote}
			
 
				+
			
 
				+If you wish to load the \verb+natbib+ package with options, you may add the
			
 
				+following before loading the \verb+neurips_2021+ package:
			
 
				+\begin{verbatim}
			
 
				+   \PassOptionsToPackage{options}{natbib}
			
 
				+\end{verbatim}
			
 
				+
			
 
				+If \verb+natbib+ clashes with another package you load, you can add the optional
			
 
				+argument \verb+nonatbib+ when loading the style file:
			
 
				+\begin{verbatim}
			
 
				+   \usepackage[nonatbib]{neurips_2021}
			
 
				+\end{verbatim}
			
 
				+
			
 
				+As submission is double blind, refer to your own published work in the third
			
 
				+person. That is, use ``In the previous work of Jones et al.\ [4],'' not ``In our
			
 
				+previous work [4].'' If you cite your other papers that are not widely available
			
 
				+(e.g., a journal paper under review), use anonymous author names in the
			
 
				+citation, e.g., an author of the form ``A.\ Anonymous.''
			
 
				+
			
 
				+\subsection{Footnotes}
			
 
				+
			
 
				+Footnotes should be used sparingly.  If you do require a footnote, indicate
			
 
				+footnotes with a number\footnote{Sample of the first footnote.} in the
			
 
				+text. Place the footnotes at the bottom of the page on which they appear.
			
 
				+Precede the footnote with a horizontal rule of 2~inches (12~picas).
			
 
				+
			
 
				+Note that footnotes are properly typeset \emph{after} punctuation
			
 
				+marks.\footnote{As in this example.}
			
 
				+
			
 
				+\subsection{Figures}
			
 
				+
			
 
				+\begin{figure}
			
 
				+  \centering
			
 
				+  \fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
			
 
				+  \caption{Sample figure caption.}
			
 
				+\end{figure}
			
 
				+
			
 
				+All artwork must be neat, clean, and legible. Lines should be dark enough for
			
 
				+purposes of reproduction. The figure number and caption always appear after the
			
 
				+figure. Place one line space before the figure caption and one line space after
			
 
				+the figure. The figure caption should be lower case (except for first word and
			
 
				+proper nouns); figures are numbered consecutively.
			
 
				+
			
 
				+You may use color figures.  However, it is best for the figure captions and the
			
 
				+paper body to be legible if the paper is printed in either black/white or in
			
 
				+color.
			
 
				+
			
 
				+\subsection{Tables}
			
 
				+
			
 
				+All tables must be centered, neat, clean and legible.  The table number and
			
 
				+title always appear before the table.  See Table~\ref{sample-table}.
			
 
				+
			
 
				+Place one line space before the table title, one line space after the
			
 
				+table title, and one line space after the table. The table title must
			
 
				+be lower case (except for first word and proper nouns); tables are
			
 
				+numbered consecutively.
			
 
				+
			
 
				+Note that publication-quality tables \emph{do not contain vertical rules.} We
			
 
				+strongly suggest the use of the \verb+booktabs+ package, which allows for
			
 
				+typesetting high-quality, professional tables:
			
 
				+\begin{center}
			
 
				+  \url{https://www.ctan.org/pkg/booktabs}
			
 
				+\end{center}
			
 
				+This package was used to typeset Table~\ref{sample-table}.
			
 
				+
			
 
				+\begin{table}
			
 
				+  \caption{Sample table title}
			
 
				+  \label{sample-table}
			
 
				+  \centering
			
 
				+  \begin{tabular}{lll}
			
 
				+    \toprule
			
 
				+    \multicolumn{2}{c}{Part}                   \\
			
 
				+    \cmidrule(r){1-2}
			
 
				+    Name     & Description     & Size ($\mu$m) \\
			
 
				+    \midrule
			
 
				+    Dendrite & Input terminal  & $\sim$100     \\
			
 
				+    Axon     & Output terminal & $\sim$10      \\
			
 
				+    Soma     & Cell body       & up to $10^6$  \\
			
 
				+    \bottomrule
			
 
				+  \end{tabular}
			
 
				+\end{table}
			
 
				+
			
 
				+\section{Final instructions}
			
 
				+
			
 
				+Do not change any aspects of the formatting parameters in the style files.  In
			
 
				+particular, do not modify the width or length of the rectangle the text should
			
 
				+fit into, and do not change font sizes (except perhaps in the
			
 
				+\textbf{References} section; see below). Please note that pages should be
			
 
				+numbered.
			
 
				+
			
 
				+\section{Preparing PDF files}
			
 
				+
			
 
				+Please prepare submission files with paper size ``US Letter,'' and not, for
			
 
				+example, ``A4.''
			
 
				+
			
 
				+Fonts were the main cause of problems in the past years. Your PDF file must only
			
 
				+contain Type 1 or Embedded TrueType fonts. Here are a few instructions to
			
 
				+achieve this.
			
 
				+
			
 
				+\begin{itemize}
			
 
				+
			
 
				+\item You should directly generate PDF files using \verb+pdflatex+.
			
 
				+
			
 
				+\item You can check which fonts a PDF files uses.  In Acrobat Reader, select the
			
 
				+  menu Files$>$Document Properties$>$Fonts and select Show All Fonts. You can
			
 
				+  also use the program \verb+pdffonts+ which comes with \verb+xpdf+ and is
			
 
				+  available out-of-the-box on most Linux machines.
			
 
				+
			
 
				+\item The IEEE has recommendations for generating PDF files whose fonts are also
			
 
				+  acceptable for NeurIPS. Please see
			
 
				+  \url{http://www.emfield.org/icuwb2010/downloads/IEEE-PDF-SpecV32.pdf}
			
 
				+
			
 
				+\item \verb+xfig+ "patterned" shapes are implemented with bitmap fonts.  Use
			
 
				+  "solid" shapes instead.
			
 
				+
			
 
				+\item The \verb+\bbold+ package almost always uses bitmap fonts.  You should use
			
 
				+  the equivalent AMS Fonts:
			
 
				+\begin{verbatim}
			
 
				+   \usepackage{amsfonts}
			
 
				+\end{verbatim}
			
 
				+followed by, e.g., \verb+\mathbb{R}+, \verb+\mathbb{N}+, or \verb+\mathbb{C}+
			
 
				+for $\mathbb{R}$, $\mathbb{N}$ or $\mathbb{C}$.  You can also use the following
			
 
				+workaround for reals, natural and complex:
			
 
				+\begin{verbatim}
			
 
				+   \newcommand{\RR}{I\!\!R} %real numbers
			
 
				+   \newcommand{\Nat}{I\!\!N} %natural numbers
			
 
				+   \newcommand{\CC}{I\!\!\!\!C} %complex numbers
			
 
				+\end{verbatim}
			
 
				+Note that \verb+amsfonts+ is automatically loaded by the \verb+amssymb+ package.
			
 
				+
			
 
				+\end{itemize}
			
 
				+
			
 
				+If your file contains type 3 fonts or non embedded TrueType fonts, we will ask
			
 
				+you to fix it.
			
 
				+
			
 
				+\subsection{Margins in \LaTeX{}}
			
 
				+
			
 
				+Most of the margin problems come from figures positioned by hand using
			
 
				+\verb+\special+ or other commands. We suggest using the command
			
 
				+\verb+\includegraphics+ from the \verb+graphicx+ package. Always specify the
			
 
				+figure width as a multiple of the line width as in the example below:
			
 
				+\begin{verbatim}
			
 
				+   \usepackage[pdftex]{graphicx} ...
			
 
				+   \includegraphics[width=0.8\linewidth]{myfile.pdf}
			
 
				+\end{verbatim}
			
 
				+See Section 4.4 in the graphics bundle documentation
			
 
				+(\url{http://mirrors.ctan.org/macros/latex/required/graphics/grfguide.pdf})
			
 
				+
			
 
				+A number of width problems arise when \LaTeX{} cannot properly hyphenate a
			
 
				+line. Please give LaTeX hyphenation hints using the \verb+\-+ command when
			
 
				+necessary.
			
 
				+
			
 
				+\begin{ack}
			
 
				+Use unnumbered first level headings for the acknowledgments. All acknowledgments
			
 
				+go at the end of the paper before the list of references. Moreover, you are required to declare
			
 
				+funding (financial activities supporting the submitted work) and competing interests (related financial activities outside the submitted work).
			
 
				+More information about this disclosure can be found at: \url{https://neurips.cc/Conferences/2021/PaperInformation/FundingDisclosure}.
			
 
				+
			
 
				+Do {\bf not} include this section in the anonymized submission, only in the final paper. You can use the \texttt{ack} environment provided in the style file to autmoatically hide this section in the anonymized submission.
			
 
				+\end{ack}
			
 
				+
			
 
				+\section*{References}
			
 
				+
			
 
				+References follow the acknowledgments. Use unnumbered first-level heading for
			
 
				+the references. Any choice of citation style is acceptable as long as you are
			
 
				+consistent. It is permissible to reduce the font size to \verb+small+ (9 point)
			
 
				+when listing the references.
			
 
				+Note that the Reference section does not count towards the page limit.
			
 
				+\medskip
			
 
				+
			
 
				+{
			
 
				+\small
			
 
				+
			
 
				+[1] Alexander, J.A.\ \& Mozer, M.C.\ (1995) Template-based algorithms for
			
 
				+connectionist rule extraction. In G.\ Tesauro, D.S.\ Touretzky and T.K.\ Leen
			
 
				+(eds.), {\it Advances in Neural Information Processing Systems 7},
			
 
				+pp.\ 609--616. Cambridge, MA: MIT Press.
			
 
				+
			
 
				+[2] Bower, J.M.\ \& Beeman, D.\ (1995) {\it The Book of GENESIS: Exploring
			
 
				+  Realistic Neural Models with the GEneral NEural SImulation System.}  New York:
			
 
				+TELOS/Springer--Verlag.
			
 
				+
			
 
				+[3] Hasselmo, M.E., Schnell, E.\ \& Barkai, E.\ (1995) Dynamics of learning and
			
 
				+recall at excitatory recurrent synapses and cholinergic modulation in rat
			
 
				+hippocampal region CA3. {\it Journal of Neuroscience} {\bf 15}(7):5249-5262.
			
 
				+}
			
 
				+
			
 
				+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
			
 
				+\section*{Checklist}
			
 
				+
			
 
				+%%% BEGIN INSTRUCTIONS %%%
			
 
				+The checklist follows the references.  Please
			
 
				+read the checklist guidelines carefully for information on how to answer these
			
 
				+questions.  For each question, change the default \answerTODO{} to \answerYes{},
			
 
				+\answerNo{}, or \answerNA{}.  You are strongly encouraged to include a {\bf
			
 
				+justification to your answer}, either by referencing the appropriate section of
			
 
				+your paper or providing a brief inline description.  For example:
			
 
				+\begin{itemize}
			
 
				+  \item Did you include the license to the code and datasets? \answerYes{See Section~\ref{gen_inst}.}
			
 
				+  \item Did you include the license to the code and datasets? \answerNo{The code and the data are proprietary.}
			
 
				+  \item Did you include the license to the code and datasets? \answerNA{}
			
 
				+\end{itemize}
			
 
				+Please do not modify the questions and only use the provided macros for your
			
 
				+answers.  Note that the Checklist section does not count towards the page
			
 
				+limit.  In your paper, please delete this instructions block and only keep the
			
 
				+Checklist section heading above along with the questions/answers below.
			
 
				+%%% END INSTRUCTIONS %%%
			
 
				+
			
 
				+\begin{enumerate}
			
 
				+
			
 
				+\item For all authors...
			
 
				+\begin{enumerate}
			
 
				+  \item Do the main claims made in the abstract and introduction accurately reflect the paper's contributions and scope?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you describe the limitations of your work?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you discuss any potential negative societal impacts of your work?
			
 
				+    \answerTODO{}
			
 
				+  \item Have you read the ethics review guidelines and ensured that your paper conforms to them?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\item If you are including theoretical results...
			
 
				+\begin{enumerate}
			
 
				+  \item Did you state the full set of assumptions of all theoretical results?
			
 
				+    \answerTODO{}
			
 
				+	\item Did you include complete proofs of all theoretical results?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\item If you ran experiments...
			
 
				+\begin{enumerate}
			
 
				+  \item Did you include the code, data, and instructions needed to reproduce the main experimental results (either in the supplemental material or as a URL)?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you specify all the training details (e.g., data splits, hyperparameters, how they were chosen)?
			
 
				+    \answerTODO{}
			
 
				+	\item Did you report error bars (e.g., with respect to the random seed after running experiments multiple times)?
			
 
				+    \answerTODO{}
			
 
				+	\item Did you include the total amount of compute and the type of resources used (e.g., type of GPUs, internal cluster, or cloud provider)?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\item If you are using existing assets (e.g., code, data, models) or curating/releasing new assets...
			
 
				+\begin{enumerate}
			
 
				+  \item If your work uses existing assets, did you cite the creators?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you mention the license of the assets?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you include any new assets either in the supplemental material or as a URL?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you discuss whether and how consent was obtained from people whose data you're using/curating?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you discuss whether the data you are using/curating contains personally identifiable information or offensive content?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\item If you used crowdsourcing or conducted research with human subjects...
			
 
				+\begin{enumerate}
			
 
				+  \item Did you include the full text of instructions given to participants and screenshots, if applicable?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you describe any potential participant risks, with links to Institutional Review Board (IRB) approvals, if applicable?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you include the estimated hourly wage paid to participants and the total amount spent on participant compensation?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\end{enumerate}
			
 
				+
			
 
				+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
			
 
				+
			
 
				+\appendix
			
 
				+
			
 
				+\section{Appendix}
			
 
				+
			
 
				+Optionally include extra information (complete proofs, additional experiments and plots) in the appendix.
			
 
				+This section will often be part of the supplemental material.
			
 
				+
			
 
				+\end{document}
			
--- a/doc/projectregistration2022/projectregistration.tex
+++ b/doc/projectregistration2022/projectregistration.tex
@@ -0,0 +1,58 @@
 
				+\documentclass{article}
			
 
				+
			
 
				+% if you need to pass options to natbib, use, e.g.:
			
 
				+%     \PassOptionsToPackage{numbers, compress}{natbib}
			
 
				+% before loading neurips_2021
			
 
				+
			
 
				+% ready for submission
			
 
				+\usepackage[preprint]{neurips_2021}
			
 
				+
			
 
				+% to compile a preprint version, e.g., for submission to arXiv, add add the
			
 
				+% [preprint] option:
			
 
				+%     \usepackage[preprint]{neurips_2021}
			
 
				+
			
 
				+% to compile a camera-ready version, add the [final] option, e.g.:
			
 
				+%     \usepackage[final]{neurips_2021}
			
 
				+
			
 
				+% to avoid loading the natbib package, add option nonatbib:
			
 
				+%    \usepackage[nonatbib]{neurips_2021}
			
 
				+
			
 
				+\usepackage[utf8]{inputenc} % allow utf-8 input
			
 
				+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
			
 
				+\usepackage[colorlinks=true]{hyperref}       % hyperlinks
			
 
				+\usepackage{url}            % simple URL typesetting
			
 
				+\usepackage{booktabs}       % professional-quality tables
			
 
				+\usepackage{amsfonts}       % blackboard math symbols
			
 
				+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
			
 
				+\usepackage{microtype}      % microtypography
			
 
				+\usepackage{xcolor}         % colors
			
 
				+
			
 
				+\title{Analyzing Gender Share\\in Casting Actors}
			
 
				+
			
 
				+% The \author macro works with any number of authors. There are two commands
			
 
				+% used to separate the names and addresses of multiple authors: \And and \AND.
			
 
				+%
			
 
				+% Using \And between authors leaves it to LaTeX to determine where to break the
			
 
				+% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
			
 
				+% authors names on the first line, and the last on the second line, try using
			
 
				+% \AND instead of \And before the third author name.
			
 
				+
			
 
				+\author{%
			
 
				+  Sophia Herrmann\\
			
 
				+  Matrikelnummer 5688690\\
			
 
				+  \texttt{so.herrmann@student.uni-tuebingen.de} \\
			
 
				+  \And
			
 
				+  Tobias Stumpp\\
			
 
				+  Matrikelnummer 3798377\\
			
 
				+  \texttt{tobias.stumpp@student.uni-tuebingen.de} \\
			
 
				+}
			
 
				+
			
 
				+\begin{document}
			
 
				+
			
 
				+\maketitle
			
 
				+
			
 
				+\begin{abstract}
			
 
				+  We are planning to use a dataset on \href{https://datasets.imdbws.com/name.basics.tsv.gz}{film-actors}, \href{https://datasets.imdbws.com/title.basics.tsv.gz}{film-titles}, \href{https://datasets.imdbws.com/title.ratings.tsv.gz}{film-ratings} from the \href{https://imdb.com}{IMDb} to examine how the female share on the cast of actors has changed over years. We want to look at when and in which genres the gender share has changed. We want to see if we can find correlations of film ratings and genres on gender share, and, if applicable, see how well film rating can be predicted.
			
 
				+\end{abstract}
			
 
				+
			
 
				+\end{document}
			
--- a/doc/projectsubmission2022/bibliography.bib
+++ b/doc/projectsubmission2022/bibliography.bib
@@ -0,0 +1,57 @@
 
				+@online{gitrepo,
			
 
				+  title   = "{Code-Repository - Gender-Share-in-Casting-Actors\_DL-WS2122\_public}",
			
 
				+  url     = "{https://coreco.samstagskind.de/tobi/Gender-Share-in-Casting-Actors_DL-WS2122}",
			
 
				+  urldate = "{2022-02-07}",
			
 
				+  data    = "{2022-02-07}"
			
 
				+}
			
 
				+
			
 
				+@online{bechdeltestgoogletrends,
			
 
				+  title   = "{Bendchel test - Explore - Google Trends}",
			
 
				+  url     = "{https://trends.google.com/trends/explore?hl=en&date=all&q=%2Fm%2F0kfxr6x}",
			
 
				+  urldate = "{2022-02-06}",
			
 
				+  data    = "{2022-01-30}"
			
 
				+}
			
 
				+
			
 
				+@online{imdbiface,
			
 
				+  title   = "{IMDb Datasets}",
			
 
				+  url     = "{https://www.imdb.com/interfaces/}",
			
 
				+  urldate = "{2022-01-30}",
			
 
				+  data    = "{2022-01-30}"
			
 
				+}
			
 
				+
			
 
				+@online{imdbws,
			
 
				+  title   = "{IMDb data files available for download}",
			
 
				+  url     = "{https://datasets.imdbws.com/}",
			
 
				+  urldate = "{2022-01-30}",
			
 
				+  data    = "{2022-01-30}"
			
 
				+}
			
 
				+
			
 
				+@online{moviepilotfilmebechtelmehrkohle,
			
 
				+  title   = "{Filme, die den Bechdel-Test bestehen, bringen mehr Kohle}",
			
 
				+  url     = "{https://www.moviepilot.de/news/filme-die-den-bechdel-test-bestehen-bringen-mehr-kohle-128899}",
			
 
				+  urldate = "{2022-02-06}",
			
 
				+  data    = "{2014-04-03}"
			
 
				+}
			
 
				+https://www.moviepilot.de/news/filme-die-den-bechdel-test-bestehen-bringen-mehr-kohle-128899
			
 
				+
			
 
				+@online{fivethirtyeightexclusionwomen,
			
 
				+  title   = "{The Dollar-And-Cents Case Against Hollywood’s Exclusion of Women | FiveThirtyEight}",
			
 
				+  url     = "{https://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women/}",
			
 
				+  urldate = "{2022-02-06}",
			
 
				+  data    = "{2014-04-01}"
			
 
				+}
			
 
				+
			
 
				+@online{bechdeltestwikien,
			
 
				+  title   = "{Bechdel test - Wikipedia}",
			
 
				+  url     = "{https://en.wikipedia.org/wiki/Bechdel_test}",
			
 
				+  urldate = "{2022-02-06}",
			
 
				+  data    = "{2022-01-02}"
			
 
				+}
			
 
				+
			
 
				+@online{dtwofblog,
			
 
				+  title   = "{DTWOF: The Blog: The Rule}",
			
 
				+  url     = "{https://alisonbechdel.blogspot.com/2005/08/rule.html}",
			
 
				+  urldate = "{2022-02-06}",
			
 
				+  data    = "{2005-08-16}",
			
 
				+  author  = "{Alison Bechdel}"
			
 
				+}
			
--- a/doc/projectsubmission2022/fig-001_Share-in-principal-cast-of-actresses-in-all-movies-1980-2020.png
+++ b/doc/projectsubmission2022/fig-001_Share-in-principal-cast-of-actresses-in-all-movies-1980-2020.png
--- a/doc/projectsubmission2022/neurips_2021.sty
+++ b/doc/projectsubmission2022/neurips_2021.sty
@@ -0,0 +1,377 @@
 
				+% partial rewrite of the LaTeX2e package for submissions to the
			
 
				+% Conference on Neural Information Processing Systems (NeurIPS):
			
 
				+%
			
 
				+% - uses more LaTeX conventions
			
 
				+% - line numbers at submission time replaced with aligned numbers from
			
 
				+%   lineno package
			
 
				+% - \nipsfinalcopy replaced with [final] package option
			
 
				+% - automatically loads times package for authors
			
 
				+% - loads natbib automatically; this can be suppressed with the
			
 
				+%   [nonatbib] package option
			
 
				+% - adds foot line to first page identifying the conference
			
 
				+% - adds preprint option for submission to e.g. arXiv
			
 
				+% - conference acronym modified
			
 
				+%
			
 
				+% Roman Garnett (garnett@wustl.edu) and the many authors of
			
 
				+% nips15submit_e.sty, including MK and drstrip@sandia
			
 
				+%
			
 
				+% last revision: March 2021
			
 
				+
			
 
				+\NeedsTeXFormat{LaTeX2e}
			
 
				+\ProvidesPackage{neurips_2021}[2021/03/31 NeurIPS 2021 submission/camera-ready style file]
			
 
				+
			
 
				+% declare final option, which creates camera-ready copy
			
 
				+\newif\if@neuripsfinal\@neuripsfinalfalse
			
 
				+\DeclareOption{final}{
			
 
				+  \@neuripsfinaltrue
			
 
				+}
			
 
				+
			
 
				+% declare nonatbib option, which does not load natbib in case of
			
 
				+% package clash (users can pass options to natbib via
			
 
				+% \PassOptionsToPackage)
			
 
				+\newif\if@natbib\@natbibtrue
			
 
				+\DeclareOption{nonatbib}{
			
 
				+  \@natbibfalse
			
 
				+}
			
 
				+
			
 
				+% declare preprint option, which creates a preprint version ready for
			
 
				+% upload to, e.g., arXiv
			
 
				+\newif\if@preprint\@preprintfalse
			
 
				+\DeclareOption{preprint}{
			
 
				+  \@preprinttrue
			
 
				+}
			
 
				+
			
 
				+\ProcessOptions\relax
			
 
				+
			
 
				+% determine whether this is an anonymized submission
			
 
				+\newif\if@submission\@submissiontrue
			
 
				+\if@neuripsfinal\@submissionfalse\fi
			
 
				+\if@preprint\@submissionfalse\fi
			
 
				+
			
 
				+% fonts
			
 
				+\renewcommand{\rmdefault}{ptm}
			
 
				+\renewcommand{\sfdefault}{phv}
			
 
				+
			
 
				+% change this every year for notice string at bottom
			
 
				+\newcommand{\@neuripsordinal}{35th}
			
 
				+\newcommand{\@neuripsyear}{2021}
			
 
				+\newcommand{\@neuripslocation}{virtual}
			
 
				+
			
 
				+% acknowledgments
			
 
				+\usepackage{environ}
			
 
				+\newcommand{\acksection}{\section*{Acknowledgments and Disclosure of Funding}}
			
 
				+\NewEnviron{ack}{%
			
 
				+  \acksection
			
 
				+  \BODY
			
 
				+}
			
 
				+
			
 
				+% handle tweaks for camera-ready copy vs. submission copy
			
 
				+\if@preprint
			
 
				+  \newcommand{\@noticestring}{%
			
 
				+    Project Report for \emph{Data Literacy} 2021/22
			
 
				+  }
			
 
				+\else
			
 
				+  \if@neuripsfinal
			
 
				+    \newcommand{\@noticestring}{%
			
 
				+      \@neuripsordinal\/ Conference on Neural Information Processing Systems
			
 
				+      (NeurIPS \@neuripsyear).%, \@neuripslocation.%
			
 
				+    }
			
 
				+  \else
			
 
				+    \newcommand{\@noticestring}{%
			
 
				+      Submitted to \@neuripsordinal\/ Conference on Neural Information
			
 
				+      Processing Systems (NeurIPS \@neuripsyear). Do not distribute.%
			
 
				+    }
			
 
				+
			
 
				+    % hide the acknowledgements
			
 
				+    \NewEnviron{hide}{}
			
 
				+    \let\ack\hide
			
 
				+    \let\endack\endhide
			
 
				+
			
 
				+    % line numbers for submission
			
 
				+    \RequirePackage{lineno}
			
 
				+    \linenumbers
			
 
				+
			
 
				+    % fix incompatibilities between lineno and amsmath, if required, by
			
 
				+    % transparently wrapping linenomath environments around amsmath
			
 
				+    % environments
			
 
				+    \AtBeginDocument{%
			
 
				+      \@ifpackageloaded{amsmath}{%
			
 
				+        \newcommand*\patchAmsMathEnvironmentForLineno[1]{%
			
 
				+          \expandafter\let\csname old#1\expandafter\endcsname\csname #1\endcsname
			
 
				+          \expandafter\let\csname oldend#1\expandafter\endcsname\csname end#1\endcsname
			
 
				+          \renewenvironment{#1}%
			
 
				+                           {\linenomath\csname old#1\endcsname}%
			
 
				+                           {\csname oldend#1\endcsname\endlinenomath}%
			
 
				+        }%
			
 
				+        \newcommand*\patchBothAmsMathEnvironmentsForLineno[1]{%
			
 
				+          \patchAmsMathEnvironmentForLineno{#1}%
			
 
				+          \patchAmsMathEnvironmentForLineno{#1*}%
			
 
				+        }%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{equation}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{align}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{flalign}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{alignat}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{gather}%
			
 
				+        \patchBothAmsMathEnvironmentsForLineno{multline}%
			
 
				+      }{}
			
 
				+    }
			
 
				+  \fi
			
 
				+\fi
			
 
				+
			
 
				+% load natbib unless told otherwise
			
 
				+\if@natbib
			
 
				+  \RequirePackage{natbib}
			
 
				+\fi
			
 
				+
			
 
				+% set page geometry
			
 
				+\usepackage[verbose=true,letterpaper]{geometry}
			
 
				+\AtBeginDocument{
			
 
				+  \newgeometry{
			
 
				+    textheight=9in,
			
 
				+    textwidth=5.5in,
			
 
				+    top=1in,
			
 
				+    headheight=12pt,
			
 
				+    headsep=25pt,
			
 
				+    footskip=30pt
			
 
				+  }
			
 
				+  \@ifpackageloaded{fullpage}
			
 
				+    {\PackageWarning{neurips_2021}{fullpage package not allowed! Overwriting formatting.}}
			
 
				+    {}
			
 
				+}
			
 
				+
			
 
				+\widowpenalty=10000
			
 
				+\clubpenalty=10000
			
 
				+\flushbottom
			
 
				+\sloppy
			
 
				+
			
 
				+% font sizes with reduced leading
			
 
				+\renewcommand{\normalsize}{%
			
 
				+  \@setfontsize\normalsize\@xpt\@xipt
			
 
				+  \abovedisplayskip      7\p@ \@plus 2\p@ \@minus 5\p@
			
 
				+  \abovedisplayshortskip \z@ \@plus 3\p@
			
 
				+  \belowdisplayskip      \abovedisplayskip
			
 
				+  \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@
			
 
				+}
			
 
				+\normalsize
			
 
				+\renewcommand{\small}{%
			
 
				+  \@setfontsize\small\@ixpt\@xpt
			
 
				+  \abovedisplayskip      6\p@ \@plus 1.5\p@ \@minus 4\p@
			
 
				+  \abovedisplayshortskip \z@  \@plus 2\p@
			
 
				+  \belowdisplayskip      \abovedisplayskip
			
 
				+  \belowdisplayshortskip 3\p@ \@plus 2\p@   \@minus 2\p@
			
 
				+}
			
 
				+\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt}
			
 
				+\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt}
			
 
				+\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt}
			
 
				+\renewcommand{\large}{\@setfontsize\large\@xiipt{14}}
			
 
				+\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}}
			
 
				+\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}}
			
 
				+\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}}
			
 
				+\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}}
			
 
				+
			
 
				+% sections with less space
			
 
				+\providecommand{\section}{}
			
 
				+\renewcommand{\section}{%
			
 
				+  \@startsection{section}{1}{\z@}%
			
 
				+                {-2.0ex \@plus -0.5ex \@minus -0.2ex}%
			
 
				+                { 1.5ex \@plus  0.3ex \@minus  0.2ex}%
			
 
				+                {\large\bf\raggedright}%
			
 
				+}
			
 
				+\providecommand{\subsection}{}
			
 
				+\renewcommand{\subsection}{%
			
 
				+  \@startsection{subsection}{2}{\z@}%
			
 
				+                {-1.8ex \@plus -0.5ex \@minus -0.2ex}%
			
 
				+                { 0.8ex \@plus  0.2ex}%
			
 
				+                {\normalsize\bf\raggedright}%
			
 
				+}
			
 
				+\providecommand{\subsubsection}{}
			
 
				+\renewcommand{\subsubsection}{%
			
 
				+  \@startsection{subsubsection}{3}{\z@}%
			
 
				+                {-1.5ex \@plus -0.5ex \@minus -0.2ex}%
			
 
				+                { 0.5ex \@plus  0.2ex}%
			
 
				+                {\normalsize\bf\raggedright}%
			
 
				+}
			
 
				+\providecommand{\paragraph}{}
			
 
				+\renewcommand{\paragraph}{%
			
 
				+  \@startsection{paragraph}{4}{\z@}%
			
 
				+                {1.5ex \@plus 0.5ex \@minus 0.2ex}%
			
 
				+                {-1em}%
			
 
				+                {\normalsize\bf}%
			
 
				+}
			
 
				+\providecommand{\subparagraph}{}
			
 
				+\renewcommand{\subparagraph}{%
			
 
				+  \@startsection{subparagraph}{5}{\z@}%
			
 
				+                {1.5ex \@plus 0.5ex \@minus 0.2ex}%
			
 
				+                {-1em}%
			
 
				+                {\normalsize\bf}%
			
 
				+}
			
 
				+\providecommand{\subsubsubsection}{}
			
 
				+\renewcommand{\subsubsubsection}{%
			
 
				+  \vskip5pt{\noindent\normalsize\rm\raggedright}%
			
 
				+}
			
 
				+
			
 
				+% float placement
			
 
				+\renewcommand{\topfraction      }{0.85}
			
 
				+\renewcommand{\bottomfraction   }{0.4}
			
 
				+\renewcommand{\textfraction     }{0.1}
			
 
				+\renewcommand{\floatpagefraction}{0.7}
			
 
				+
			
 
				+\newlength{\@neuripsabovecaptionskip}\setlength{\@neuripsabovecaptionskip}{7\p@}
			
 
				+\newlength{\@neuripsbelowcaptionskip}\setlength{\@neuripsbelowcaptionskip}{\z@}
			
 
				+
			
 
				+\setlength{\abovecaptionskip}{\@neuripsabovecaptionskip}
			
 
				+\setlength{\belowcaptionskip}{\@neuripsbelowcaptionskip}
			
 
				+
			
 
				+% swap above/belowcaptionskip lengths for tables
			
 
				+\renewenvironment{table}
			
 
				+  {\setlength{\abovecaptionskip}{\@neuripsbelowcaptionskip}%
			
 
				+   \setlength{\belowcaptionskip}{\@neuripsabovecaptionskip}%
			
 
				+   \@float{table}}
			
 
				+  {\end@float}
			
 
				+
			
 
				+% footnote formatting
			
 
				+\setlength{\footnotesep }{6.65\p@}
			
 
				+\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@}
			
 
				+\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@}
			
 
				+\setcounter{footnote}{0}
			
 
				+
			
 
				+% paragraph formatting
			
 
				+\setlength{\parindent}{\z@}
			
 
				+\setlength{\parskip  }{5.5\p@}
			
 
				+
			
 
				+% list formatting
			
 
				+\setlength{\topsep       }{4\p@ \@plus 1\p@   \@minus 2\p@}
			
 
				+\setlength{\partopsep    }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@}
			
 
				+\setlength{\itemsep      }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
			
 
				+\setlength{\parsep       }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
			
 
				+\setlength{\leftmargin   }{3pc}
			
 
				+\setlength{\leftmargini  }{\leftmargin}
			
 
				+\setlength{\leftmarginii }{2em}
			
 
				+\setlength{\leftmarginiii}{1.5em}
			
 
				+\setlength{\leftmarginiv }{1.0em}
			
 
				+\setlength{\leftmarginv  }{0.5em}
			
 
				+\def\@listi  {\leftmargin\leftmargini}
			
 
				+\def\@listii {\leftmargin\leftmarginii
			
 
				+              \labelwidth\leftmarginii
			
 
				+              \advance\labelwidth-\labelsep
			
 
				+              \topsep  2\p@ \@plus 1\p@    \@minus 0.5\p@
			
 
				+              \parsep  1\p@ \@plus 0.5\p@ \@minus 0.5\p@
			
 
				+              \itemsep \parsep}
			
 
				+\def\@listiii{\leftmargin\leftmarginiii
			
 
				+              \labelwidth\leftmarginiii
			
 
				+              \advance\labelwidth-\labelsep
			
 
				+              \topsep    1\p@ \@plus 0.5\p@ \@minus 0.5\p@
			
 
				+              \parsep    \z@
			
 
				+              \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@
			
 
				+              \itemsep \topsep}
			
 
				+\def\@listiv {\leftmargin\leftmarginiv
			
 
				+              \labelwidth\leftmarginiv
			
 
				+              \advance\labelwidth-\labelsep}
			
 
				+\def\@listv  {\leftmargin\leftmarginv
			
 
				+              \labelwidth\leftmarginv
			
 
				+              \advance\labelwidth-\labelsep}
			
 
				+\def\@listvi {\leftmargin\leftmarginvi
			
 
				+              \labelwidth\leftmarginvi
			
 
				+              \advance\labelwidth-\labelsep}
			
 
				+
			
 
				+% create title
			
 
				+\providecommand{\maketitle}{}
			
 
				+\renewcommand{\maketitle}{%
			
 
				+  \par
			
 
				+  \begingroup
			
 
				+    \renewcommand{\thefootnote}{\fnsymbol{footnote}}
			
 
				+    % for perfect author name centering
			
 
				+    \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}}
			
 
				+    % The footnote-mark was overlapping the footnote-text,
			
 
				+    % added the following to fix this problem               (MK)
			
 
				+    \long\def\@makefntext##1{%
			
 
				+      \parindent 1em\noindent
			
 
				+      \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1
			
 
				+    }
			
 
				+    \thispagestyle{empty}
			
 
				+    \@maketitle
			
 
				+    \@thanks
			
 
				+    \@notice
			
 
				+  \endgroup
			
 
				+  \let\maketitle\relax
			
 
				+  \let\thanks\relax
			
 
				+}
			
 
				+
			
 
				+% rules for title box at top of first page
			
 
				+\newcommand{\@toptitlebar}{
			
 
				+  \hrule height 4\p@
			
 
				+  \vskip 0.25in
			
 
				+  \vskip -\parskip%
			
 
				+}
			
 
				+\newcommand{\@bottomtitlebar}{
			
 
				+  \vskip 0.29in
			
 
				+  \vskip -\parskip
			
 
				+  \hrule height 1\p@
			
 
				+  \vskip 0.09in%
			
 
				+}
			
 
				+
			
 
				+% create title (includes both anonymized and non-anonymized versions)
			
 
				+\providecommand{\@maketitle}{}
			
 
				+\renewcommand{\@maketitle}{%
			
 
				+  \vbox{%
			
 
				+    \hsize\textwidth
			
 
				+    \linewidth\hsize
			
 
				+    \vskip 0.1in
			
 
				+    \@toptitlebar
			
 
				+    \centering
			
 
				+    {\LARGE\bf \@title\par}
			
 
				+    \@bottomtitlebar
			
 
				+    \if@submission
			
 
				+      \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}
			
 
				+        Anonymous Author(s) \\
			
 
				+        Affiliation \\
			
 
				+        Address \\
			
 
				+        \texttt{email} \\
			
 
				+      \end{tabular}%
			
 
				+    \else
			
 
				+      \def\And{%
			
 
				+        \end{tabular}\hfil\linebreak[0]\hfil%
			
 
				+        \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
			
 
				+      }
			
 
				+      \def\AND{%
			
 
				+        \end{tabular}\hfil\linebreak[4]\hfil%
			
 
				+        \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
			
 
				+      }
			
 
				+      \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}%
			
 
				+    \fi
			
 
				+    \vskip 0.3in \@minus 0.1in
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+% add conference notice to bottom of first page
			
 
				+\newcommand{\ftype@noticebox}{8}
			
 
				+\newcommand{\@notice}{%
			
 
				+  % give a bit of extra room back to authors on first page
			
 
				+  \enlargethispage{2\baselineskip}%
			
 
				+  \@float{noticebox}[b]%
			
 
				+    \footnotesize\@noticestring%
			
 
				+  \end@float%
			
 
				+}
			
 
				+
			
 
				+% abstract styling
			
 
				+\renewenvironment{abstract}%
			
 
				+{%
			
 
				+  \vskip 0.075in%
			
 
				+  \centerline%
			
 
				+  {\large\bf Abstract}%
			
 
				+  \vspace{0.5ex}%
			
 
				+  \begin{quote}%
			
 
				+}
			
 
				+{
			
 
				+  \par%
			
 
				+  \end{quote}%
			
 
				+  \vskip 1ex%
			
 
				+}
			
 
				+
			
 
				+% For the paper checklist
			
 
				+\newcommand{\answerYes}[1][]{\textcolor{blue}{[Yes] #1}}
			
 
				+\newcommand{\answerNo}[1][]{\textcolor{orange}{[No] #1}}
			
 
				+\newcommand{\answerNA}[1][]{\textcolor{gray}{[N/A] #1}}
			
 
				+\newcommand{\answerTODO}[1][]{\textcolor{red}{\bf [TODO]}}
			
 
				+
			
 
				+\endinput
			
--- a/doc/projectsubmission2022/neurips_2021.tex
+++ b/doc/projectsubmission2022/neurips_2021.tex
@@ -0,0 +1,491 @@
 
				+\documentclass{article}
			
 
				+
			
 
				+% if you need to pass options to natbib, use, e.g.:
			
 
				+%     \PassOptionsToPackage{numbers, compress}{natbib}
			
 
				+% before loading neurips_2021
			
 
				+
			
 
				+% ready for submission
			
 
				+\usepackage[preprint]{neurips_2021}
			
 
				+
			
 
				+% to compile a preprint version, e.g., for submission to arXiv, add add the
			
 
				+% [preprint] option:
			
 
				+%     \usepackage[preprint]{neurips_2021}
			
 
				+
			
 
				+% to compile a camera-ready version, add the [final] option, e.g.:
			
 
				+%     \usepackage[final]{neurips_2021}
			
 
				+
			
 
				+% to avoid loading the natbib package, add option nonatbib:
			
 
				+%    \usepackage[nonatbib]{neurips_2021}
			
 
				+
			
 
				+\usepackage[utf8]{inputenc} % allow utf-8 input
			
 
				+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
			
 
				+\usepackage{hyperref}       % hyperlinks
			
 
				+\usepackage{url}            % simple URL typesetting
			
 
				+\usepackage{booktabs}       % professional-quality tables
			
 
				+\usepackage{amsfonts}       % blackboard math symbols
			
 
				+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
			
 
				+\usepackage{microtype}      % microtypography
			
 
				+\usepackage{xcolor}         % colors
			
 
				+
			
 
				+\title{Formatting Instructions For NeurIPS 2021}
			
 
				+
			
 
				+% The \author macro works with any number of authors. There are two commands
			
 
				+% used to separate the names and addresses of multiple authors: \And and \AND.
			
 
				+%
			
 
				+% Using \And between authors leaves it to LaTeX to determine where to break the
			
 
				+% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
			
 
				+% authors names on the first line, and the last on the second line, try using
			
 
				+% \AND instead of \And before the third author name.
			
 
				+
			
 
				+\author{%
			
 
				+  David S.~Hippocampus\thanks{Use footnote for providing further information
			
 
				+    about author (webpage, alternative address)---\emph{not} for acknowledging
			
 
				+    funding agencies.} \\
			
 
				+  Department of Computer Science\\
			
 
				+  Cranberry-Lemon University\\
			
 
				+  Pittsburgh, PA 15213 \\
			
 
				+  \texttt{hippo@cs.cranberry-lemon.edu} \\
			
 
				+  % examples of more authors
			
 
				+  % \And
			
 
				+  % Coauthor \\
			
 
				+  % Affiliation \\
			
 
				+  % Address \\
			
 
				+  % \texttt{email} \\
			
 
				+  % \AND
			
 
				+  % Coauthor \\
			
 
				+  % Affiliation \\
			
 
				+  % Address \\
			
 
				+  % \texttt{email} \\
			
 
				+  % \And
			
 
				+  % Coauthor \\
			
 
				+  % Affiliation \\
			
 
				+  % Address \\
			
 
				+  % \texttt{email} \\
			
 
				+  % \And
			
 
				+  % Coauthor \\
			
 
				+  % Affiliation \\
			
 
				+  % Address \\
			
 
				+  % \texttt{email} \\
			
 
				+}
			
 
				+
			
 
				+\begin{document}
			
 
				+
			
 
				+\maketitle
			
 
				+
			
 
				+\begin{abstract}
			
 
				+  The abstract paragraph should be indented \nicefrac{1}{2}~inch (3~picas) on
			
 
				+  both the left- and right-hand margins. Use 10~point type, with a vertical
			
 
				+  spacing (leading) of 11~points.  The word \textbf{Abstract} must be centered,
			
 
				+  bold, and in point size 12. Two line spaces precede the abstract. The abstract
			
 
				+  must be limited to one paragraph.
			
 
				+\end{abstract}
			
 
				+
			
 
				+\section{Submission of papers to NeurIPS 2021}
			
 
				+
			
 
				+Please read the instructions below carefully and follow them faithfully.
			
 
				+
			
 
				+\subsection{Style}
			
 
				+
			
 
				+Papers to be submitted to NeurIPS 2021 must be prepared according to the
			
 
				+instructions presented here. Papers may only be up to {\bf nine} pages long,
			
 
				+including figures. Additional pages \emph{containing only acknowledgments and
			
 
				+references} are allowed. Papers that exceed the page limit will not be
			
 
				+reviewed, or in any other way considered for presentation at the conference.
			
 
				+
			
 
				+The margins in 2021 are the same as those in 2007, which allow for $\sim$$15\%$
			
 
				+more words in the paper compared to earlier years.
			
 
				+
			
 
				+Authors are required to use the NeurIPS \LaTeX{} style files obtainable at the
			
 
				+NeurIPS website as indicated below. Please make sure you use the current files
			
 
				+and not previous versions. Tweaking the style files may be grounds for
			
 
				+rejection.
			
 
				+
			
 
				+\subsection{Retrieval of style files}
			
 
				+
			
 
				+The style files for NeurIPS and other conference information are available on
			
 
				+the World Wide Web at
			
 
				+\begin{center}
			
 
				+  \url{http://www.neurips.cc/}
			
 
				+\end{center}
			
 
				+The file \verb+neurips_2021.pdf+ contains these instructions and illustrates the
			
 
				+various formatting requirements your NeurIPS paper must satisfy.
			
 
				+
			
 
				+The only supported style file for NeurIPS 2021 is \verb+neurips_2021.sty+,
			
 
				+rewritten for \LaTeXe{}.  \textbf{Previous style files for \LaTeX{} 2.09,
			
 
				+  Microsoft Word, and RTF are no longer supported!}
			
 
				+
			
 
				+The \LaTeX{} style file contains three optional arguments: \verb+final+, which
			
 
				+creates a camera-ready copy, \verb+preprint+, which creates a preprint for
			
 
				+submission to, e.g., arXiv, and \verb+nonatbib+, which will not load the
			
 
				+\verb+natbib+ package for you in case of package clash.
			
 
				+
			
 
				+\paragraph{Preprint option}
			
 
				+If you wish to post a preprint of your work online, e.g., on arXiv, using the
			
 
				+NeurIPS style, please use the \verb+preprint+ option. This will create a
			
 
				+nonanonymized version of your work with the text ``Preprint. Work in progress.''
			
 
				+in the footer. This version may be distributed as you see fit. Please \textbf{do
			
 
				+  not} use the \verb+final+ option, which should \textbf{only} be used for
			
 
				+papers accepted to NeurIPS.
			
 
				+
			
 
				+At submission time, please omit the \verb+final+ and \verb+preprint+
			
 
				+options. This will anonymize your submission and add line numbers to aid
			
 
				+review. Please do \emph{not} refer to these line numbers in your paper as they
			
 
				+will be removed during generation of camera-ready copies.
			
 
				+
			
 
				+The file \verb+neurips_2021.tex+ may be used as a ``shell'' for writing your
			
 
				+paper. All you have to do is replace the author, title, abstract, and text of
			
 
				+the paper with your own.
			
 
				+
			
 
				+The formatting instructions contained in these style files are summarized in
			
 
				+Sections \ref{gen_inst}, \ref{headings}, and \ref{others} below.
			
 
				+
			
 
				+\section{General formatting instructions}
			
 
				+\label{gen_inst}
			
 
				+
			
 
				+The text must be confined within a rectangle 5.5~inches (33~picas) wide and
			
 
				+9~inches (54~picas) long. The left margin is 1.5~inch (9~picas).  Use 10~point
			
 
				+type with a vertical spacing (leading) of 11~points.  Times New Roman is the
			
 
				+preferred typeface throughout, and will be selected for you by default.
			
 
				+Paragraphs are separated by \nicefrac{1}{2}~line space (5.5 points), with no
			
 
				+indentation.
			
 
				+
			
 
				+The paper title should be 17~point, initial caps/lower case, bold, centered
			
 
				+between two horizontal rules. The top rule should be 4~points thick and the
			
 
				+bottom rule should be 1~point thick. Allow \nicefrac{1}{4}~inch space above and
			
 
				+below the title to rules. All pages should start at 1~inch (6~picas) from the
			
 
				+top of the page.
			
 
				+
			
 
				+For the final version, authors' names are set in boldface, and each name is
			
 
				+centered above the corresponding address. The lead author's name is to be listed
			
 
				+first (left-most), and the co-authors' names (if different address) are set to
			
 
				+follow. If there is only one co-author, list both author and co-author side by
			
 
				+side.
			
 
				+
			
 
				+Please pay special attention to the instructions in Section \ref{others}
			
 
				+regarding figures, tables, acknowledgments, and references.
			
 
				+
			
 
				+\section{Headings: first level}
			
 
				+\label{headings}
			
 
				+
			
 
				+All headings should be lower case (except for first word and proper nouns),
			
 
				+flush left, and bold.
			
 
				+
			
 
				+First-level headings should be in 12-point type.
			
 
				+
			
 
				+\subsection{Headings: second level}
			
 
				+
			
 
				+Second-level headings should be in 10-point type.
			
 
				+
			
 
				+\subsubsection{Headings: third level}
			
 
				+
			
 
				+Third-level headings should be in 10-point type.
			
 
				+
			
 
				+\paragraph{Paragraphs}
			
 
				+
			
 
				+There is also a \verb+\paragraph+ command available, which sets the heading in
			
 
				+bold, flush left, and inline with the text, with the heading followed by 1\,em
			
 
				+of space.
			
 
				+
			
 
				+\section{Citations, figures, tables, references}
			
 
				+\label{others}
			
 
				+
			
 
				+These instructions apply to everyone.
			
 
				+
			
 
				+\subsection{Citations within the text}
			
 
				+
			
 
				+The \verb+natbib+ package will be loaded for you by default.  Citations may be
			
 
				+author/year or numeric, as long as you maintain internal consistency.  As to the
			
 
				+format of the references themselves, any style is acceptable as long as it is
			
 
				+used consistently.
			
 
				+
			
 
				+The documentation for \verb+natbib+ may be found at
			
 
				+\begin{center}
			
 
				+  \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf}
			
 
				+\end{center}
			
 
				+Of note is the command \verb+\citet+, which produces citations appropriate for
			
 
				+use in inline text.  For example,
			
 
				+\begin{verbatim}
			
 
				+   \citet{hasselmo} investigated\dots
			
 
				+\end{verbatim}
			
 
				+produces
			
 
				+\begin{quote}
			
 
				+  Hasselmo, et al.\ (1995) investigated\dots
			
 
				+\end{quote}
			
 
				+
			
 
				+If you wish to load the \verb+natbib+ package with options, you may add the
			
 
				+following before loading the \verb+neurips_2021+ package:
			
 
				+\begin{verbatim}
			
 
				+   \PassOptionsToPackage{options}{natbib}
			
 
				+\end{verbatim}
			
 
				+
			
 
				+If \verb+natbib+ clashes with another package you load, you can add the optional
			
 
				+argument \verb+nonatbib+ when loading the style file:
			
 
				+\begin{verbatim}
			
 
				+   \usepackage[nonatbib]{neurips_2021}
			
 
				+\end{verbatim}
			
 
				+
			
 
				+As submission is double blind, refer to your own published work in the third
			
 
				+person. That is, use ``In the previous work of Jones et al.\ [4],'' not ``In our
			
 
				+previous work [4].'' If you cite your other papers that are not widely available
			
 
				+(e.g., a journal paper under review), use anonymous author names in the
			
 
				+citation, e.g., an author of the form ``A.\ Anonymous.''
			
 
				+
			
 
				+\subsection{Footnotes}
			
 
				+
			
 
				+Footnotes should be used sparingly.  If you do require a footnote, indicate
			
 
				+footnotes with a number\footnote{Sample of the first footnote.} in the
			
 
				+text. Place the footnotes at the bottom of the page on which they appear.
			
 
				+Precede the footnote with a horizontal rule of 2~inches (12~picas).
			
 
				+
			
 
				+Note that footnotes are properly typeset \emph{after} punctuation
			
 
				+marks.\footnote{As in this example.}
			
 
				+
			
 
				+\subsection{Figures}
			
 
				+
			
 
				+\begin{figure}
			
 
				+  \centering
			
 
				+  \fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
			
 
				+  \caption{Sample figure caption.}
			
 
				+\end{figure}
			
 
				+
			
 
				+All artwork must be neat, clean, and legible. Lines should be dark enough for
			
 
				+purposes of reproduction. The figure number and caption always appear after the
			
 
				+figure. Place one line space before the figure caption and one line space after
			
 
				+the figure. The figure caption should be lower case (except for first word and
			
 
				+proper nouns); figures are numbered consecutively.
			
 
				+
			
 
				+You may use color figures.  However, it is best for the figure captions and the
			
 
				+paper body to be legible if the paper is printed in either black/white or in
			
 
				+color.
			
 
				+
			
 
				+\subsection{Tables}
			
 
				+
			
 
				+All tables must be centered, neat, clean and legible.  The table number and
			
 
				+title always appear before the table.  See Table~\ref{sample-table}.
			
 
				+
			
 
				+Place one line space before the table title, one line space after the
			
 
				+table title, and one line space after the table. The table title must
			
 
				+be lower case (except for first word and proper nouns); tables are
			
 
				+numbered consecutively.
			
 
				+
			
 
				+Note that publication-quality tables \emph{do not contain vertical rules.} We
			
 
				+strongly suggest the use of the \verb+booktabs+ package, which allows for
			
 
				+typesetting high-quality, professional tables:
			
 
				+\begin{center}
			
 
				+  \url{https://www.ctan.org/pkg/booktabs}
			
 
				+\end{center}
			
 
				+This package was used to typeset Table~\ref{sample-table}.
			
 
				+
			
 
				+\begin{table}
			
 
				+  \caption{Sample table title}
			
 
				+  \label{sample-table}
			
 
				+  \centering
			
 
				+  \begin{tabular}{lll}
			
 
				+    \toprule
			
 
				+    \multicolumn{2}{c}{Part}                   \\
			
 
				+    \cmidrule(r){1-2}
			
 
				+    Name     & Description     & Size ($\mu$m) \\
			
 
				+    \midrule
			
 
				+    Dendrite & Input terminal  & $\sim$100     \\
			
 
				+    Axon     & Output terminal & $\sim$10      \\
			
 
				+    Soma     & Cell body       & up to $10^6$  \\
			
 
				+    \bottomrule
			
 
				+  \end{tabular}
			
 
				+\end{table}
			
 
				+
			
 
				+\section{Final instructions}
			
 
				+
			
 
				+Do not change any aspects of the formatting parameters in the style files.  In
			
 
				+particular, do not modify the width or length of the rectangle the text should
			
 
				+fit into, and do not change font sizes (except perhaps in the
			
 
				+\textbf{References} section; see below). Please note that pages should be
			
 
				+numbered.
			
 
				+
			
 
				+\section{Preparing PDF files}
			
 
				+
			
 
				+Please prepare submission files with paper size ``US Letter,'' and not, for
			
 
				+example, ``A4.''
			
 
				+
			
 
				+Fonts were the main cause of problems in the past years. Your PDF file must only
			
 
				+contain Type 1 or Embedded TrueType fonts. Here are a few instructions to
			
 
				+achieve this.
			
 
				+
			
 
				+\begin{itemize}
			
 
				+
			
 
				+\item You should directly generate PDF files using \verb+pdflatex+.
			
 
				+
			
 
				+\item You can check which fonts a PDF files uses.  In Acrobat Reader, select the
			
 
				+  menu Files$>$Document Properties$>$Fonts and select Show All Fonts. You can
			
 
				+  also use the program \verb+pdffonts+ which comes with \verb+xpdf+ and is
			
 
				+  available out-of-the-box on most Linux machines.
			
 
				+
			
 
				+\item The IEEE has recommendations for generating PDF files whose fonts are also
			
 
				+  acceptable for NeurIPS. Please see
			
 
				+  \url{http://www.emfield.org/icuwb2010/downloads/IEEE-PDF-SpecV32.pdf}
			
 
				+
			
 
				+\item \verb+xfig+ "patterned" shapes are implemented with bitmap fonts.  Use
			
 
				+  "solid" shapes instead.
			
 
				+
			
 
				+\item The \verb+\bbold+ package almost always uses bitmap fonts.  You should use
			
 
				+  the equivalent AMS Fonts:
			
 
				+\begin{verbatim}
			
 
				+   \usepackage{amsfonts}
			
 
				+\end{verbatim}
			
 
				+followed by, e.g., \verb+\mathbb{R}+, \verb+\mathbb{N}+, or \verb+\mathbb{C}+
			
 
				+for $\mathbb{R}$, $\mathbb{N}$ or $\mathbb{C}$.  You can also use the following
			
 
				+workaround for reals, natural and complex:
			
 
				+\begin{verbatim}
			
 
				+   \newcommand{\RR}{I\!\!R} %real numbers
			
 
				+   \newcommand{\Nat}{I\!\!N} %natural numbers
			
 
				+   \newcommand{\CC}{I\!\!\!\!C} %complex numbers
			
 
				+\end{verbatim}
			
 
				+Note that \verb+amsfonts+ is automatically loaded by the \verb+amssymb+ package.
			
 
				+
			
 
				+\end{itemize}
			
 
				+
			
 
				+If your file contains type 3 fonts or non embedded TrueType fonts, we will ask
			
 
				+you to fix it.
			
 
				+
			
 
				+\subsection{Margins in \LaTeX{}}
			
 
				+
			
 
				+Most of the margin problems come from figures positioned by hand using
			
 
				+\verb+\special+ or other commands. We suggest using the command
			
 
				+\verb+\includegraphics+ from the \verb+graphicx+ package. Always specify the
			
 
				+figure width as a multiple of the line width as in the example below:
			
 
				+\begin{verbatim}
			
 
				+   \usepackage[pdftex]{graphicx} ...
			
 
				+   \includegraphics[width=0.8\linewidth]{myfile.pdf}
			
 
				+\end{verbatim}
			
 
				+See Section 4.4 in the graphics bundle documentation
			
 
				+(\url{http://mirrors.ctan.org/macros/latex/required/graphics/grfguide.pdf})
			
 
				+
			
 
				+A number of width problems arise when \LaTeX{} cannot properly hyphenate a
			
 
				+line. Please give LaTeX hyphenation hints using the \verb+\-+ command when
			
 
				+necessary.
			
 
				+
			
 
				+\begin{ack}
			
 
				+Use unnumbered first level headings for the acknowledgments. All acknowledgments
			
 
				+go at the end of the paper before the list of references. Moreover, you are required to declare
			
 
				+funding (financial activities supporting the submitted work) and competing interests (related financial activities outside the submitted work).
			
 
				+More information about this disclosure can be found at: \url{https://neurips.cc/Conferences/2021/PaperInformation/FundingDisclosure}.
			
 
				+
			
 
				+Do {\bf not} include this section in the anonymized submission, only in the final paper. You can use the \texttt{ack} environment provided in the style file to autmoatically hide this section in the anonymized submission.
			
 
				+\end{ack}
			
 
				+
			
 
				+\section*{References}
			
 
				+
			
 
				+References follow the acknowledgments. Use unnumbered first-level heading for
			
 
				+the references. Any choice of citation style is acceptable as long as you are
			
 
				+consistent. It is permissible to reduce the font size to \verb+small+ (9 point)
			
 
				+when listing the references.
			
 
				+Note that the Reference section does not count towards the page limit.
			
 
				+\medskip
			
 
				+
			
 
				+{
			
 
				+\small
			
 
				+
			
 
				+[1] Alexander, J.A.\ \& Mozer, M.C.\ (1995) Template-based algorithms for
			
 
				+connectionist rule extraction. In G.\ Tesauro, D.S.\ Touretzky and T.K.\ Leen
			
 
				+(eds.), {\it Advances in Neural Information Processing Systems 7},
			
 
				+pp.\ 609--616. Cambridge, MA: MIT Press.
			
 
				+
			
 
				+[2] Bower, J.M.\ \& Beeman, D.\ (1995) {\it The Book of GENESIS: Exploring
			
 
				+  Realistic Neural Models with the GEneral NEural SImulation System.}  New York:
			
 
				+TELOS/Springer--Verlag.
			
 
				+
			
 
				+[3] Hasselmo, M.E., Schnell, E.\ \& Barkai, E.\ (1995) Dynamics of learning and
			
 
				+recall at excitatory recurrent synapses and cholinergic modulation in rat
			
 
				+hippocampal region CA3. {\it Journal of Neuroscience} {\bf 15}(7):5249-5262.
			
 
				+}
			
 
				+
			
 
				+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
			
 
				+\section*{Checklist}
			
 
				+
			
 
				+%%% BEGIN INSTRUCTIONS %%%
			
 
				+The checklist follows the references.  Please
			
 
				+read the checklist guidelines carefully for information on how to answer these
			
 
				+questions.  For each question, change the default \answerTODO{} to \answerYes{},
			
 
				+\answerNo{}, or \answerNA{}.  You are strongly encouraged to include a {\bf
			
 
				+justification to your answer}, either by referencing the appropriate section of
			
 
				+your paper or providing a brief inline description.  For example:
			
 
				+\begin{itemize}
			
 
				+  \item Did you include the license to the code and datasets? \answerYes{See Section~\ref{gen_inst}.}
			
 
				+  \item Did you include the license to the code and datasets? \answerNo{The code and the data are proprietary.}
			
 
				+  \item Did you include the license to the code and datasets? \answerNA{}
			
 
				+\end{itemize}
			
 
				+Please do not modify the questions and only use the provided macros for your
			
 
				+answers.  Note that the Checklist section does not count towards the page
			
 
				+limit.  In your paper, please delete this instructions block and only keep the
			
 
				+Checklist section heading above along with the questions/answers below.
			
 
				+%%% END INSTRUCTIONS %%%
			
 
				+
			
 
				+\begin{enumerate}
			
 
				+
			
 
				+\item For all authors...
			
 
				+\begin{enumerate}
			
 
				+  \item Do the main claims made in the abstract and introduction accurately reflect the paper's contributions and scope?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you describe the limitations of your work?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you discuss any potential negative societal impacts of your work?
			
 
				+    \answerTODO{}
			
 
				+  \item Have you read the ethics review guidelines and ensured that your paper conforms to them?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\item If you are including theoretical results...
			
 
				+\begin{enumerate}
			
 
				+  \item Did you state the full set of assumptions of all theoretical results?
			
 
				+    \answerTODO{}
			
 
				+	\item Did you include complete proofs of all theoretical results?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\item If you ran experiments...
			
 
				+\begin{enumerate}
			
 
				+  \item Did you include the code, data, and instructions needed to reproduce the main experimental results (either in the supplemental material or as a URL)?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you specify all the training details (e.g., data splits, hyperparameters, how they were chosen)?
			
 
				+    \answerTODO{}
			
 
				+	\item Did you report error bars (e.g., with respect to the random seed after running experiments multiple times)?
			
 
				+    \answerTODO{}
			
 
				+	\item Did you include the total amount of compute and the type of resources used (e.g., type of GPUs, internal cluster, or cloud provider)?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\item If you are using existing assets (e.g., code, data, models) or curating/releasing new assets...
			
 
				+\begin{enumerate}
			
 
				+  \item If your work uses existing assets, did you cite the creators?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you mention the license of the assets?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you include any new assets either in the supplemental material or as a URL?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you discuss whether and how consent was obtained from people whose data you're using/curating?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you discuss whether the data you are using/curating contains personally identifiable information or offensive content?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\item If you used crowdsourcing or conducted research with human subjects...
			
 
				+\begin{enumerate}
			
 
				+  \item Did you include the full text of instructions given to participants and screenshots, if applicable?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you describe any potential participant risks, with links to Institutional Review Board (IRB) approvals, if applicable?
			
 
				+    \answerTODO{}
			
 
				+  \item Did you include the estimated hourly wage paid to participants and the total amount spent on participant compensation?
			
 
				+    \answerTODO{}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+\end{enumerate}
			
 
				+
			
 
				+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
			
 
				+
			
 
				+\appendix
			
 
				+
			
 
				+\section{Appendix}
			
 
				+
			
 
				+Optionally include extra information (complete proofs, additional experiments and plots) in the appendix.
			
 
				+This section will often be part of the supplemental material.
			
 
				+
			
 
				+\end{document}
			
--- a/doc/projectsubmission2022/projectsubmission.tex
+++ b/doc/projectsubmission2022/projectsubmission.tex
@@ -0,0 +1,230 @@
 
				+\documentclass{article}
			
 
				+
			
 
				+% if you need to pass options to natbib, use, e.g.:
			
 
				+%     \PassOptionsToPackage{numbers, compress}{natbib}
			
 
				+% before loading neurips_2021
			
 
				+
			
 
				+\bibliographystyle{unsrtnat}
			
 
				+\PassOptionsToPackage{numbers, compress}{natbib}
			
 
				+% ready for submission
			
 
				+ 
			
 
				+ \usepackage[preprint]{neurips_2021}
			
 
				+%\usepackage[nonatbib,preprint]{neurips_2021}
			
 
				+
			
 
				+% to compile a preprint version, e.g., for submission to arXiv, add add the
			
 
				+% [preprint] option:
			
 
				+%     \usepackage[preprint]{neurips_2021}
			
 
				+
			
 
				+% to compile a camera-ready version, add the [final] option, e.g.:
			
 
				+%     \usepackage[final]{neurips_2021}
			
 
				+
			
 
				+% to avoid loading the natbib package, add option nonatbib:
			
 
				+%    \usepackage[nonatbib]{neurips_2021}
			
 
				+
			
 
				+\usepackage[utf8]{inputenc} % allow utf-8 input
			
 
				+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
			
 
				+\usepackage[colorlinks=true]{hyperref}       % hyperlinks
			
 
				+\usepackage{url}            % simple URL typesetting
			
 
				+\usepackage{booktabs}       % professional-quality tables
			
 
				+\usepackage{amsfonts}       % blackboard math symbols
			
 
				+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
			
 
				+\usepackage{microtype}      % microtypography
			
 
				+\usepackage{xcolor}         % colors
			
 
				+\usepackage{graphicx} %package to manage images
			
 
				+\usepackage[nodayofweek,level]{datetime}
			
 
				+\usepackage{adjustbox}
			
 
				+
			
 
				+\title{Analyzing Gender Share\\in Casting Actors}
			
 
				+
			
 
				+% The \author macro works with any number of authors. There are two commands
			
 
				+% used to separate the names and addresses of multiple authors: \And and \AND.
			
 
				+%
			
 
				+% Using \And between authors leaves it to LaTeX to determine where to break the
			
 
				+% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
			
 
				+% authors names on the first line, and the last on the second line, try using
			
 
				+% \AND instead of \And before the third author name.
			
 
				+
			
 
				+\author{%
			
 
				+  Sophia Herrmann\\
			
 
				+  Matrikelnummer 5688690\\
			
 
				+  \texttt{so.herrmann@student.uni-tuebingen.de} \\
			
 
				+  \And
			
 
				+  Tobias Stumpp\\
			
 
				+  Matrikelnummer 3798377\\
			
 
				+  \texttt{tobias.stumpp@student.uni-tuebingen.de} \\
			
 
				+}
			
 
				+
			
 
				+\begin{document}
			
 
				+
			
 
				+\maketitle
			
 
				+
			
 
				+\begin{abstract}
			
 
				+  We use the dataset on \href{https://datasets.imdbws.com/title.principals.tsv.gz}{film-principals}, \href{https://datasets.imdbws.com/title.basics.tsv.gz}{film-titles}, \href{https://datasets.imdbws.com/title.ratings.tsv.gz}{film-ratings} from the \href{https://imdb.com}{IMDb}~\citep{imdbiface,imdbws} to examine how the female share on the cast of principal actors has changed over years. We want to look at when and in which genres the gender share has changed. We want to see if we can find correlations of film ratings and genres on gender share, and, if applicable, see how well film rating can be predicted.
			
 
				+\end{abstract}
			
 
				+
			
 
				+% - Wieso ist Gender Share/unsere Fragestellung von Interesse.
			
 
				+%  - Bendchtel-Test-Ersatz
			
 
				+%  - Fragen
			
 
				+%    - "Bendchtel-Test hat Schlagzeilen um 2000 gemacht." Hat sich seither etwas verändert?
			
 
				+%    - "Filme, die den Bendcheltest bestehen wären erfolgreicher." Stimmt das?
			
 
				+%  - Ziel (kurz)
			
 
				+%     - Wir untersuchen "Frage 1" mit, wollen Ergebnis ob..
			
 
				+%     - Wir untersuchen "Frage 2" mit, wollen Ergebnis ob..
			
 
				+% - Welche Daten haben wir
			
 
				+%   - Datenvorstellung IMDb
			
 
				+%   - Übersicht der Features
			
 
				+% - Methoden
			
 
				+%   - Beschreibung
			
 
				+%   - (Vorraussetzungen/Assumptions)
			
 
				+% - Analyse & Ergebnisse 
			
 
				+%   - Datenanalyse
			
 
				+%   - Statistiktests
			
 
				+% - Probleme/Limitations
			
 
				+% - Resümee
			
 
				+
			
 
				+
			
 
				+\section{Impact of Bechdel test on the female share in principal cast}
			
 
				+\label{sect_intro}
			
 
				+
			
 
				+In the context of gender equality, and inspired by the Bechdel test and a possible impact of the test, we aim to examine the gender balance in principal roles in movies by using IMDb data~\citep{imdbiface,imdbws} on movie casting.
			
 
				+
			
 
				+The Bechdel test is an indicator of active female roles in fiction. The basis for the test as understood today goes back to a comic strip from 1985, with criteria that can also be derived from the narrative: A woman explains that she will only go to movies that (1) feature at least two women (2) talking to each other (3) about something other than a man.~\cite{bechdeltestwikien,dtwofblog}
			
 
				+The English Wikipedia page on the Bechdel test mentions two statements that we would like to examine within the scope of our possibilities on data analysis:
			
 
				+
			
 
				+\begin{enumerate}
			
 
				+    \item "the test became more widely discussed in the 2000s"~\citep{bechdeltestwikien,bechdeltestgoogletrends}\\
			
 
				+    We test: Did the proportion of women in principal roles in movies change after the year 2000?
			
 
				+    \item "the films that passed the test had about a 37 percent higher return on investment (ROI)"\\
			
 
				+    We test: Does the proportion of women in principal roles correlate with movie success?~\citep{bechdeltestwikien,fivethirtyeightexclusionwomen}
			
 
				+\end{enumerate}
			
 
				+
			
 
				+We assume, the 2000s media attention of the Bechdel test leaded to both an increase in the popularity of movies with higher female share in principal cast, but also assume a trend in movie industry to cast more actresses in principal roles. Herein we find an incentive for further analysis regarding possible observable patterns in the share of female in principal cast and the popularity of movies. Herein we interpret 2000 as a critical year for a significant shift.
			
 
				+
			
 
				+In line with these assumptions, we test (1) for significant change of actress share in principal roles with year 2000, and we analyze (2) correlation and predictability between actress share and average rating as measure of popularity with years after 2000.
			
 
				+
			
 
				+% - Welche Daten haben wir 
			
 
				+
			
 
				+\section{Dataset description and preprocessing}
			
 
				+\label{sect_dataset}
			
 
				+We analyze data from the Internet Movie Database (IMDb), which provides a public subset for public research purposes. The IMDb as an online-platform provides users a retrieval and filing of detailed information on movies, television series, video productions, and computer games which provides a public subset for public research purposes. The public subset of IMDb api-retrievable-data includes movies from 1890 to the present day. The subset of the IMDb publicly provided data is regenerated daily. We make use the files and features as shown in table~\ref{feature_table}.
			
 
				+
			
 
				+\begin{table}
			
 
				+  \caption{Files and features in use}
			
 
				+  \label{feature_table}
			
 
				+  \centering
			
 
				+  \begin{adjustbox}{width=\columnwidth,center}
			
 
				+  \begin{tabular}{lllp{12cm}}
			
 
				+    \toprule
			
 
				+    
			
 
				+    File     & Feature & Type & Description \\
			
 
				+    \midrule
			
 
				+    film-principals\footnote{\url{https://datasets.imdbws.com/title.principals.tsv.gz}}
			
 
				+    & tconst     & (string)  & alphanumeric unique identifier of the title \\ \cmidrule(r){2-4}
			
 
				+    & nconst     & (string)  & alphanumeric unique identifier of the name/person \\ \cmidrule(r){2-4}
			
 
				+    & category   & (string)  & the category of job that person was in \\
			
 
				+    
			
 
				+    \hline
			
 
				+
			
 
				+    film-titles\footnote{\url{https://datasets.imdbws.com/title.basics.tsv.gz}}
			
 
				+    & tconst         & (string)       & alphanumeric unique identifier of the title \\ \cmidrule(r){2-4}
			
 
				+    & titleType      & (string)       & the type/format of the title (e.g. movie, short, tvseries, tvepisode, video, etc) \\ \cmidrule(r){2-4}
			
 
				+    & startYear      & (YYYY)         & represents the release year of a title. In the case of TV Series, it is the series start year \\ \cmidrule(r){2-4}
			
 
				+    & runtimeMinutes & (integer)      & primary runtime of the title, in minutes \\ \cmidrule(r){2-4}
			
 
				+    & genres         & (string array) & includes up to three genres associated with the title \\
			
 
				+    
			
 
				+    \hline
			
 
				+    
			
 
				+    film-ratings \footnote{\url{https://datasets.imdbws.com/title.ratings.tsv.gz}}
			
 
				+    & tconst         & (string)  & alphanumeric unique identifier of the title \\ \cmidrule(r){2-4}
			
 
				+    & averageRating  & (integer) & weighted average of all the individual user ratings \\ \cmidrule(r){2-4}
			
 
				+    & numVotes       & (integer) & number of votes the title has received\\
			
 
				+    
			
 
				+    \bottomrule
			
 
				+  \end{tabular}
			
 
				+  \end{adjustbox}
			
 
				+\end{table}
			
 
				+
			
 
				+
			
 
				+\label{sect_preprocessing}
			
 
				+Our download from \formatdate{30}{1}{2022} captures 77.838.777 million movies which we preprocess in several steps:
			
 
				+
			
 
				+\begin{itemize}
			
 
				+    \item We consider only movies within the time frame from 1980 to 2020.
			
 
				+    
			
 
				+    \item We drop movies regarding the feature \emph{movie duration}. Some movies show a duration of a few single minutes. On the other extreme, some movies show of over 1000 minutes. Filtering the dataset from likely lower quality movies, movies with a duration above the 95\% quantile [135 min] or below the 5\% quantile [52 min] are removed and therefore ignored in our analysis.
			
 
				+    
			
 
				+    \item We only keep relevant features: The movie id (tconst), the movie release year (startYear), genres, the movie duration (runtimeMinutes), category (indicating if the movie contains actor(s) and/or actress(es) in the principal cast).
			
 
				+    
			
 
				+    \item We functionally derive dependend data. I.e., we derive the share and proportion of actresses that are in principal cast for each movie. We derive the proportion of the absolute numbers of actresses against actors.
			
 
				+\end{itemize}
			
 
				+
			
 
				+
			
 
				+For the second analysis only the time frame between 2000 and 2020 was considered. Therefore, the data set drops to a size of 880.209 movies. Additionally, the feature genre had to be further prepossessed. Genre covers 951 different entries, where the majority of movies presents genre overlaps such as Drama-Comedy or Drama-Thriller-Horror. Keeping all of those 951 genres as a dummy variable is messy. Splitting those overlaps of genres and allowing movies to have several genres would lead to dependencies. Hence, for further analysis only movies were considered that belong to a single genre (number of single genres = 24, new data set size = 43'680). This approach could also reveal that movies that are strictly assigned to one genre differ a lot in their features against other genres.
			
 
				+
			
 
				+
			
 
				+% - Methoden
			
 
				+\section{Methods}
			
 
				+\label{sect_methods}
			
 
				+\begin{figure}
			
 
				+  \centering
			
 
				+  %\fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
			
 
				+  \includegraphics[width=1\textwidth]{fig-001_Share-in-principal-cast-of-actresses-in-all-movies-1980-2020.png}
			
 
				+  \caption{Share in principal cast of actresses in all movies, 1980 - 2020.}
			
 
				+  \label{actresses_prop_figure}
			
 
				+\end{figure}
			
 
				+
			
 
				+\subsubsection*{Descriptive Analysis}
			
 
				+Firstly, we use figure~\ref{actresses_prop_figure} to receive an overview about range of dispersion of the shares of actresses on principal cast for each single year. Here, the left time frame covers the years from 1980 to 1990 (marked with blue points) and the right time frame covers the years from 2000 to 2020 (marked with orange points). Additionally, for each year the mean value over the shares of actresses on principal cast was computed and marked with green and red points.
			
 
				+Observing differences in the share of actresses on principal cast after 2000 is difficult to evaluate. The figure presents a high variation in the shares in principal cast of actresses, hence the computed means for each year go in line with high standard deviations. Hence, a clear change in pattern in the years after 2000 against the years before 2000 cannot be identified.
			
 
				+However, the mean values presents to be slightly higher after 2000. 
			
 
				+Presenting more qualitative insights of possible differences in the share of actresses on principal cast, significance test are implemented. 
			
 
				+
			
 
				+\subsubsection*{Statistical analysis}
			
 
				+
			
 
				+With t-testing, our goal is to find out if the mean $\mu_1$ on the proportion of actresses in principal roles from 2000-2020 differs significantly compared to the mean $\mu_0$ on the proportion of actresses in principal roles in 1980-2000.
			
 
				+
			
 
				+With beta-binomial-testing, we put a beta-prior on $f_0$ (the probability to experience an amount of shares) which is based on $m_0$ (the number of a share on movies in 1980-2000) in $n_0$ movies (the number of movies in 1980-2000).\\
			
 
				+Next Under the null hypothesis $H_0: f_1 = f_0$, the number of movies with a share in 2000-2020 $m_1$ (given the number of movies in 2000-2020 $n_1$) follows a binomial distribution.\\
			
 
				+This tells us the probability to observe $m_1$ shares for movies in 2000-2020, given the number of movies in 2000-2020 $n_1$ and the statistics $m_0$, $n_0$ for the years 1980-2000.
			
 
				+
			
 
				+\subsubsection*{Analyzing the relationship of the share of actresses on principal cast and average movie ratings and the suitability of linear regression models for predictive modeling}
			
 
				+
			
 
				+The relationship of the female share on principal cast on the average mean rating between 2000 and 2020 was analyzed by a scatter plot. Further, the linear regression model was implemented to evaluate its suitability as prediction model for the average rating on the share of actresses on the principal cast.
			
 
				+Additionally, the impact of including the features movie duration and genre on the model fit of the linear regression was analyzed. For the latter model, only those movies were considered that covers a single genre. The genres were included as dummy variables, whereby the dummy variable for the genre "drama" was excluded due to multicollinearity. 
			
 
				+
			
 
				+%   - Statistiktests und regression
			
 
				+
			
 
				+\section{Results}
			
 
				+\label{sect_results}
			
 
				+
			
 
				+With (1)~\ref{sect_intro} we want to study whether the proportion of principal roles filled by actresses differs between the periods 1980-2000 and 2000-2020. We do not find a clear indication in a visual~analysis~\ref{actresses_prop_figure}, we assume due to high variances and a discrete fashion of available data.
			
 
				+
			
 
				+The statistical tests in a non-visual analysis, more specifically the t-test and the beta-binomial-test result in insignificant p-values\footnote{\url{https://coreco.samstagskind.de/tobi/Gender-Share-in-Casting-Actors_DL-WS2122_public/src/branch/master/exp/exp-003_T-Test-Hypothesis-Testing.ipynb}}~\citep{gitrepo} except for two occasions on the beta-binomal-test that propose significance: Testing whether there are unlikely\footnote{\url{https://coreco.samstagskind.de/tobi/Gender-Share-in-Casting-Actors_DL-WS2122_public/src/branch/master/exp/exp-004_Beta-Binomial-Hypothesis-Testing.ipynb}}~\citep{gitrepo}\\
			
 
				+\begin{itemize}
			
 
				+    \item more movies with a majority of actresses in the principal roles.
			
 
				+    \item less movies with a minority of actresses in the principal roles.
			
 
				+\end{itemize}
			
 
				+
			
 
				+With (2)~\ref{sect_intro} we do not find a correlation of actress share of principal cast on average rating\footnote{\url{https://coreco.samstagskind.de/tobi/Gender-Share-in-Casting-Actors_DL-WS2122_public/src/branch/master/exp/exp-005_Relationship-Rating-and-Share-Actresses-on-principal-cast.ipynb}}~\citep{gitrepo}.
			
 
				+Firstly, a simple scatter plot of the share of actresses on principal cast against the average rating did not present any pattern. Each value of the actress share covered almost the whole range of possible rating scores. Additionally, the pearson correlation coefficient was computed and affirmed no meaningful linear relationship by a value of -0.07. Due to those results, the previous idea of using a linear regression model could already be stated as an unsuitable prediction model, not fulfilling model assumptions of linearity. In line with this, the linear regression model presented a bad model fit by the R-squared value of 0.005. Even though the estimated coefficient for the actress share was significant, the aim of receiving accurate predictions for average movie rating on actress share is not given by a linear regression model with a single predictor.
			
 
				+The results of including the movie duration and genre as additional explanatory variables into the linear regression model were again unsatisfactory. The overall model fit claimed to be better than in the first model, but was still bad by a R-squared of 0.22. Hence, the idea of controlling for single genres by dummy variables and therefore to receiving probably a lower variation in the data within all single genres is not given. 
			
 
				+Positively, many dummy variables were significant, that incentives to further research of a possible relationship of actress share on principal cast and average rating within single genres.
			
 
				+
			
 
				+% - Probleme/Limitations
			
 
				+
			
 
				+\section{Discussion}
			
 
				+\label{sect_discussion}
			
 
				+ The paper does not detect a clear difference of the share of actresses on principal cast in the years before and after 2000. The significant tests provided contradictory results.
			
 
				+ However, the use of the t test is to be questioned. The assumption of normal distributed data cannot be well fulfilled due to a more discrete pattern of the actress shares.
			
 
				+ 
			
 
				+ Additionally, the previous sticking to the goal of predicting the average rating by the share of actresses on principal cast was naive. The linear regression model was unsuitable as well as the small set of predictor variables.
			
 
				+
			
 
				+{
			
 
				+\small
			
 
				+
			
 
				+\bibliography{bibliography}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+\end{document}
			
 
				+
			
--- a/exp/exp-001_Data-Preprocessing-and-Provisioning.ipynb
+++ b/exp/exp-001_Data-Preprocessing-and-Provisioning.ipynb
@@ -0,0 +1,353 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Data Literacy - Project\n",
			
 
				+    "## Gender Share in Movies\n",
			
 
				+    "#### Tobias Stumpp, Sophia Herrmann"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### README & TODO\n",
			
 
				+    "\n",
			
 
				+    "Please run all cells of this ipython document once. You may use the button that's revealed by executing the next cell.  \n",
			
 
				+    "With an execution, this document prepares and provides files as a preprocessing step for all the experiments in this repository."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "'Please click this button below to provide the required preprocessed data files for the experiments:'"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    },
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "application/vnd.jupyter.widget-view+json": {
			
 
				+       "model_id": "6d505788715c424d9776b876af6b6290",
			
 
				+       "version_major": 2,
			
 
				+       "version_minor": 0
			
 
				+      },
			
 
				+      "text/plain": [
			
 
				+       "Button(description='Run all cells below', style=ButtonStyle())"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from IPython.display import Javascript, display\n",
			
 
				+    "from ipywidgets import widgets\n",
			
 
				+    "\n",
			
 
				+    "def run_all(ev):\n",
			
 
				+    "    Javascript('IPython.Application.instance().kernel.do_shutdown(True)')\n",
			
 
				+    "    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.ncells())'))\n",
			
 
				+    "\n",
			
 
				+    "display(\"Please click this button below to provide the required preprocessed data files for the experiments:\")\n",
			
 
				+    "button = widgets.Button(description=\"Run all cells below\")\n",
			
 
				+    "button.on_click(run_all)\n",
			
 
				+    "display(button)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "import os"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 3,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "path = '../'\n",
			
 
				+    "os.chdir(path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Extract data archive files"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 4,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import gzip\n",
			
 
				+    "import shutil\n",
			
 
				+    "\n",
			
 
				+    "files = [\n",
			
 
				+    "    'dat/title.basics.tsv.gz',\n",
			
 
				+    "    'dat/title.principals.tsv.gz',\n",
			
 
				+    "    'dat/title.ratings.tsv.gz',\n",
			
 
				+    "]\n",
			
 
				+    "\n",
			
 
				+    "def unzip(files=files):\n",
			
 
				+    "    for file in files:\n",
			
 
				+    "        if file.endswith('.gz'):\n",
			
 
				+    "            with gzip.open(file, 'rb') as f_in:\n",
			
 
				+    "                with open(file[:-3], 'wb') as f_out:\n",
			
 
				+    "                    shutil.copyfileobj(f_in, f_out)\n",
			
 
				+    "\n",
			
 
				+    "unzip(files)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Read data files"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_film = pd.read_csv('dat/title.basics.tsv', sep='\\t', na_values=['\\\\N'], dtype={\n",
			
 
				+    "    \"isAdult\": bool,\n",
			
 
				+    "    \"startYear\": float,\n",
			
 
				+    "    \"endYear\": float, \n",
			
 
				+    "    \"runtimeMinutes\": float,\n",
			
 
				+    "    },\n",
			
 
				+    "    # Skip lines that are syntactically incorrect and would therefore cause\n",
			
 
				+    "    # - a column shift within the row\n",
			
 
				+    "    # - assignment errors for column datatypes\n",
			
 
				+    "    skiprows=[\n",
			
 
				+    "        1098292,\n",
			
 
				+    "        1510501,\n",
			
 
				+    "        1900901,\n",
			
 
				+    "        2012237,\n",
			
 
				+    "        2167663,\n",
			
 
				+    "        2313911,\n",
			
 
				+    "        3012068,\n",
			
 
				+    "        5964307,\n",
			
 
				+    "        8605235,\n",
			
 
				+    "        8645208,\n",
			
 
				+    "    ]\n",
			
 
				+    ")\n",
			
 
				+    "# tconst (string) - alphanumeric unique identifier of the title\n",
			
 
				+    "# titleType (string) – the type/format of the title (e.g. movie, short, tvseries, tvepisode, video, etc)\n",
			
 
				+    "# primaryTitle (string) – the more popular title / the title used by the filmmakers on promotional materials at the point of release\n",
			
 
				+    "# originalTitle (string) - original title, in the original language\n",
			
 
				+    "# isAdult (boolean) - 0: non-adult title; 1: adult title\n",
			
 
				+    "# startYear (YYYY) – represents the release year of a title. In the case of TV Series, it is the series start year\n",
			
 
				+    "# endYear (YYYY) – TV Series end year. ‘\\N’ for all other title types\n",
			
 
				+    "# runtimeMinutes – primary runtime of the title, in minutes\n",
			
 
				+    "# genres (string array) – includes up to three genres associated with the title\n",
			
 
				+    "\n",
			
 
				+    "data_rating = data = pd.read_csv('dat/title.ratings.tsv', sep='\\t', na_values=['\\\\N'], dtype={\n",
			
 
				+    "    \"averageRating\": float,\n",
			
 
				+    "    \"numVotes\": float,\n",
			
 
				+    "})\n",
			
 
				+    "# tconst (string) - alphanumeric unique identifier of the title\n",
			
 
				+    "# averageRating – weighted average of all the individual user ratings\n",
			
 
				+    "# numVotes - number of votes the title has received\n",
			
 
				+    "\n",
			
 
				+    "data_principals = pd.read_csv('dat/title.principals.tsv', sep='\\t', na_values=['\\\\N'], dtype={\n",
			
 
				+    "    \"ordering\": float,\n",
			
 
				+    "})\n",
			
 
				+    "# tconst (string) - alphanumeric unique identifier of the title\n",
			
 
				+    "# ordering (integer) – a number to uniquely identify rows for a given titleId\n",
			
 
				+    "# nconst (string) - alphanumeric unique identifier of the name/person\n",
			
 
				+    "# category (string) - the category of job that person was in\n",
			
 
				+    "# job (string) - the specific job title if applicable, else '\\N'\n",
			
 
				+    "# characters (string) - the name of the character played if applicable, else '\\N'"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "----------\n",
			
 
				+    "\n",
			
 
				+    "###  Clean and merge original data into prepared datasets for experiments"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 6,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "'Initially, the dataset contains 600289 movies.'"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    },
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "'Quantiles 5% and 95% on runtime minutes yield as delimitation minutes [52.0, 135.0].'"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    },
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "'After dropping rows of these quantiles, the dataset contains 341225 movies, which is 259064 less movies.'"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# Keep only higher quality movies, hence,\n",
			
 
				+    "# - drop rows whose types aren't movies\n",
			
 
				+    "data_film.drop(data_film.index[(data_film[\"titleType\"] != \"movie\")], axis = 0, inplace=True)\n",
			
 
				+    "\n",
			
 
				+    "# - drop rows with *untypical runtime minutes*\n",
			
 
				+    "movies_count_before = data_film.shape[0]\n",
			
 
				+    "quantile = data_film[\"runtimeMinutes\"].quantile([0.05,0.95])\n",
			
 
				+    "\n",
			
 
				+    "data_film = data_film[\n",
			
 
				+    "    (data_film[\"runtimeMinutes\"] >= quantile[0.05]) &\n",
			
 
				+    "    (data_film[\"runtimeMinutes\"] <= quantile[0.95])\n",
			
 
				+    "]\n",
			
 
				+    "\n",
			
 
				+    "movies_count_after = data_film.shape[0]\n",
			
 
				+    "\n",
			
 
				+    "display(f\"Initially, the dataset contains {movies_count_before} movies.\")\n",
			
 
				+    "display(f\"Quantiles 5% and 95% on runtime minutes yield as delimitation minutes {list(quantile)}.\")\n",
			
 
				+    "display(f\"After dropping rows of these quantiles, the dataset contains {movies_count_after} movies, which is {movies_count_before-movies_count_after} less movies.\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 7,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Drop features that for our analysis are either irrelevant or incomplete\n",
			
 
				+    "data_film.drop([\"titleType\", \"primaryTitle\", \"originalTitle\", \"isAdult\", \"endYear\"], axis = 1, inplace=True)\n",
			
 
				+    "data_film.dropna(subset=[\"startYear\", \"runtimeMinutes\"], inplace=True)\n",
			
 
				+    "\n",
			
 
				+    "data_principals.drop([\"ordering\", \"nconst\", \"job\", \"characters\"], axis = 1, inplace=True)\n",
			
 
				+    "\n",
			
 
				+    "# Filter principal cast members for only actors and actresses\n",
			
 
				+    "data_principals = data_principals[\n",
			
 
				+    "    (data_principals[\"category\"] == \"actor\") |\n",
			
 
				+    "    (data_principals[\"category\"] == \"actress\")\n",
			
 
				+    "]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 8,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Merge movie data\n",
			
 
				+    "data_movie = pd.merge(data_film, data_rating, how=\"inner\", on=\"tconst\")\n",
			
 
				+    "data_movie = pd.merge(data_movie, data_principals, how=\"inner\", on=\"tconst\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 9,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Provide atomic genre data on movies\n",
			
 
				+    "data_movie_genre = data_film.copy()\n",
			
 
				+    "\n",
			
 
				+    "# Drop features that for genres are irrelevant or incomplete\n",
			
 
				+    "data_movie_genre.drop([\"startYear\", \"runtimeMinutes\"], axis=1, inplace=True)\n",
			
 
				+    "data_movie_genre.dropna(subset=[\"genres\"], inplace=True)\n",
			
 
				+    "\n",
			
 
				+    "# Break down genre to atomic data\n",
			
 
				+    "data_movie_genre[\"genres\"] = data_movie_genre[\"genres\"].str.split(\",\")\n",
			
 
				+    "data_movie_genre = data_movie_genre.explode(\"genres\").reset_index(drop=True)\n",
			
 
				+    "\n",
			
 
				+    "# Correct column title to fit atomic data\n",
			
 
				+    "data_movie_genre = data_movie_genre.rename(columns = {\"genres\": \"genre\"})"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Convert integer numbers to integer datatypes"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 10,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_movie[\"startYear\"] = data_movie[\"startYear\"].astype(int)\n",
			
 
				+    "data_movie[\"runtimeMinutes\"] = data_movie[\"runtimeMinutes\"].astype(int)\n",
			
 
				+    "data_movie[\"numVotes\"] = data_movie[\"numVotes\"].astype(int)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Write preprocessed data to files"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 11,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_movie.to_csv(\"dat/data_movie.csv\", index=False)\n",
			
 
				+    "data_movie_genre.to_csv(\"dat/data_movie_genre.csv\", index=False)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.8"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/exp/exp-002_Share-in-principal-cast-of-actresses-in-all-movies-1980-2020.ipynb
+++ b/exp/exp-002_Share-in-principal-cast-of-actresses-in-all-movies-1980-2020.ipynb
--- a/exp/exp-003_T-Test-Hypothesis-Testing.ipynb
+++ b/exp/exp-003_T-Test-Hypothesis-Testing.ipynb
--- a/exp/exp-004_Beta-Binomial-Hypothesis-Testing.ipynb
+++ b/exp/exp-004_Beta-Binomial-Hypothesis-Testing.ipynb
@@ -0,0 +1,711 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Data Literacy - Project\n",
			
 
				+    "## Gender Share in Movies\n",
			
 
				+    "#### Tobias Stumpp, Sophia Herrmann"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Beta-Binomial Hypothesis Testing"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Parameters"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Starting year of the period of years covered by the test\n",
			
 
				+    "start_year = 1980\n",
			
 
				+    "# Ending year of the period of years covered by the test\n",
			
 
				+    "end_year = start_year + 40\n",
			
 
				+    "\n",
			
 
				+    "# Split year of the period of years covered by the test that separates\n",
			
 
				+    "# indicative data (>= start_year and < split_year)\n",
			
 
				+    "# from\n",
			
 
				+    "# data to be verified (>= split_year and < end_year).\n",
			
 
				+    "split_year = start_year + 20\n",
			
 
				+    "\n",
			
 
				+    "# Option to ignore movies where the average rating or the number of votes is below the respective 5% quantile.\n",
			
 
				+    "ignore_irrelevant_movies = False"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Meta"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "import os\n",
			
 
				+    "import matplotlib.pyplot as plt"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "path = '../dat/'\n",
			
 
				+    "os.chdir(path)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Read Data"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "columns = list(pd.read_csv('data_movie.csv', nrows=1))\n",
			
 
				+    "print(columns)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "columns_to_read = [c for c in columns if c != 'genres']\n",
			
 
				+    "\n",
			
 
				+    "data_movie = pd.read_csv('data_movie.csv', usecols = columns_to_read)\n",
			
 
				+    "\n",
			
 
				+    "display(data_movie.info())\n",
			
 
				+    "display(data_movie.head())"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Provide the option to only include movies that are relevant based on the average rating and number of votes."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_movie[['numVotes','averageRating']].describe()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "numVotes_split = data_movie['numVotes'].quantile(0.05)\n",
			
 
				+    "numVotes_split"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "averageRating_split = data_movie['averageRating'].quantile(0.05)\n",
			
 
				+    "averageRating_split"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "display(data_movie.shape)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "if ignore_irrelevant_movies:\n",
			
 
				+    "    data_movie = data_movie[(data_movie['numVotes'] > numVotes_split) & (data_movie['averageRating'] > averageRating_split)]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "display(data_movie.shape)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Only include the data to movies of the selected range of years."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "display(data_movie.shape)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_movie = data_movie[(data_movie['startYear'] >= start_year) & (data_movie['startYear'] < end_year)]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "display(data_movie.shape)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Prepare Data"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "##### Add year span as a column"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "year_span_presplit = f\"{start_year}-{split_year}\"\n",
			
 
				+    "year_span_postsplit = f\"{split_year}-{end_year}\"\n",
			
 
				+    "year_span = np.where(data_movie['startYear'] < split_year, year_span_presplit, year_span_postsplit)\n",
			
 
				+    "data_movie.insert(1, 'year_span' , year_span)\n",
			
 
				+    "\n",
			
 
				+    "display(data_movie)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "##### Add counts and proportions on crew members"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_cast_numbers = pd.crosstab(data_movie['tconst'], data_movie['category']).reset_index().rename(columns = {\n",
			
 
				+    "    'actor':'num_actors',\n",
			
 
				+    "    'actress':'num_actresses',\n",
			
 
				+    "})\n",
			
 
				+    "\n",
			
 
				+    "data_cast_proportion = data_movie.groupby(['tconst'])['category'].value_counts(normalize=True).unstack().reset_index().fillna(0).rename(columns = {\n",
			
 
				+    "    'actor':'prop_actors',\n",
			
 
				+    "    'actress':'prop_actresses',\n",
			
 
				+    "})\n",
			
 
				+    "\n",
			
 
				+    "data_cast_gender_stat = pd.merge(data_cast_numbers, data_cast_proportion)\n",
			
 
				+    "data_cast_gender_stat"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_movie_distinct = data_movie.drop(columns=['category']).drop_duplicates(['tconst']).reset_index(drop = True)\n",
			
 
				+    "display(data_movie_distinct)\n",
			
 
				+    "\n",
			
 
				+    "data_movie_gender_stat = pd.merge(data_movie_distinct, data_cast_gender_stat)\n",
			
 
				+    "data_movie_gender_stat.groupby('year_span').apply(display)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "##### Add counts on proportions of actresses relative to actors"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_movie_gender_stat['num_actresses_>_num_actors'] = (data_movie_gender_stat['num_actresses'] > data_movie_gender_stat['num_actors'])\n",
			
 
				+    "data_movie_gender_stat['num_actresses_=_num_actors'] = (data_movie_gender_stat['num_actresses'] == data_movie_gender_stat['num_actors'])\n",
			
 
				+    "data_movie_gender_stat['num_actresses_<_num_actors'] = (data_movie_gender_stat['num_actresses'] < data_movie_gender_stat['num_actors'])\n",
			
 
				+    "\n",
			
 
				+    "data_movie_gender_stat['num_actresses_=_0'] = (data_movie_gender_stat['num_actresses'] == 0)\n",
			
 
				+    "data_movie_gender_stat['num_actresses_>_0'] = (data_movie_gender_stat['num_actresses'] > 0)\n",
			
 
				+    "\n",
			
 
				+    "data_movie_gender_stat"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_actresses_stat = data_movie_gender_stat.groupby(['year_span','startYear'])[[\n",
			
 
				+    "    'num_actresses_>_num_actors',\n",
			
 
				+    "    'num_actresses_=_num_actors',\n",
			
 
				+    "    'num_actresses_<_num_actors',\n",
			
 
				+    "    'num_actresses_=_0',\n",
			
 
				+    "    'num_actresses_>_0',\n",
			
 
				+    "]].sum().reset_index()\n",
			
 
				+    "\n",
			
 
				+    "data_actresses_stat['num_movies'] = (\n",
			
 
				+    "    data_actresses_stat['num_actresses_>_num_actors'] +\n",
			
 
				+    "    data_actresses_stat['num_actresses_=_num_actors'] +\n",
			
 
				+    "    data_actresses_stat['num_actresses_<_num_actors']\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "data_actresses_stat"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "##### Split data into their year spans"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_actresses_stat_timespan_presplit, data_actresses_stat_timespan_postsplit = [\n",
			
 
				+    "    g.reset_index(drop=True) for _, g in data_actresses_stat.groupby(['year_span'])\n",
			
 
				+    "]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "display(data_actresses_stat_timespan_presplit)\n",
			
 
				+    "display(data_actresses_stat_timespan_postsplit)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "display(data_actresses_stat_timespan_presplit.describe())\n",
			
 
				+    "display(data_actresses_stat_timespan_postsplit.describe())"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "data_actresses_stat_sum = data_actresses_stat.drop(columns=['startYear']).groupby(['year_span']).sum()\n",
			
 
				+    "data_actresses_stat_sum"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Analyze Data"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Compute p-Values\n",
			
 
				+    "\n",
			
 
				+    "Our goal is to find out if actresses achieved significantly more movies with *majority shares* or less movies with *minority shares* in the principal casts after the split year than before the split year.  \n",
			
 
				+    "We perform a beta-binomial test and explicitly follow the example presented in the lecture and exercise on scores of the German Bundesliga."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "- First, we put a beta-prior on $f_0$ (the majority probability before the split year) which is based on $m_0$ (the number of movies with majority share before the split year) in $n_0$ movies (the number of movies before the split year).\n",
			
 
				+    "\n",
			
 
				+    "- Under the null hypothesis $H_0: f_1 = f_0$, the number of movies with majority share after the split year $m_1$ (given the number of movies after the split year $n_1$) follows a binomial distribution. \n",
			
 
				+    "\n",
			
 
				+    "- Putting these building blocks together, we obtain a [beta-binomial distribution](https://en.wikipedia.org/wiki/Beta-binomial_distribution)\n",
			
 
				+    "\n",
			
 
				+    "    \\begin{equation}\n",
			
 
				+    "    p(m_1 \\vert n_1, m_0, n_0) \n",
			
 
				+    "    = {n_1\\choose m_1} \n",
			
 
				+    "    \\frac{\\mathcal{B}(m_0 + m_1 + 1, (n_0-m_0) + (n_1-m_1) + 1)}\n",
			
 
				+    "    {\\mathcal{B}(m_0 + 1, n_0 - m_0 + 1)}.\n",
			
 
				+    "    \\end{equation}\n",
			
 
				+    "\n",
			
 
				+    "    This tells us the probability to observe $m_1$ for movies with *majority shares* after the split year, given the number of movies after the split year $n_1$ and the statistics $m_0$, $n_0$ for the years before."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from scipy.stats import betabinom"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def p_val_won(m_1, n_1, m_0, n_0):\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    Compute p-value by summing the evidence p(m_1 | n_1, m_0, n_0) over the \n",
			
 
				+    "    observed number of won movies and 'more extreme' (i.e. smaller) movie counts.\n",
			
 
				+    "    \n",
			
 
				+    "    Parameters\n",
			
 
				+    "    ----------\n",
			
 
				+    "    m_1 : int\n",
			
 
				+    "        Number of won movies after the split year (0 <= m_1 <= n_1)\n",
			
 
				+    "    n_1 : int\n",
			
 
				+    "        Number of movies after the split year (n_1 > 0)\n",
			
 
				+    "    m_0 : int\n",
			
 
				+    "        Number of won movies before the split year (0 <= m_0 <= n_0)\n",
			
 
				+    "    n_0 : int\n",
			
 
				+    "        Number of movies before the split year (n_0 > 0)\n",
			
 
				+    "    \n",
			
 
				+    "    Result\n",
			
 
				+    "    ------\n",
			
 
				+    "    The probability for observing m_1 or less movies.\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    return betabinom.cdf(m_1, n_1, m_0 + 1, n_0 - m_0 + 1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def p_val_lost(m_1, n_1, m_0, n_0):\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    Compute p-value by summing the evidence p(m_1 | n_1, m_0, n_0) over the \n",
			
 
				+    "    observed number of lost movies and 'more extreme' (i.e. larger) movie counts.\n",
			
 
				+    "    \n",
			
 
				+    "    Parameters\n",
			
 
				+    "    ----------\n",
			
 
				+    "    m_1 : int\n",
			
 
				+    "        Number of lost movies after the split year (0 <= m_1 <= n_1)\n",
			
 
				+    "    n_1 : int\n",
			
 
				+    "        Number of movies after the split year (n_1 > 0)\n",
			
 
				+    "    m_0 : int\n",
			
 
				+    "        Number of lost movies before the split year (0 <= m_0 <= n_0)\n",
			
 
				+    "    n_0 : int\n",
			
 
				+    "        Number of movies before the split year (n_0 > 0)\n",
			
 
				+    "    \n",
			
 
				+    "    Result\n",
			
 
				+    "    ------\n",
			
 
				+    "    The probability for observing m_1 or more movies.\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    return 1.0 - betabinom.cdf(m_1 - 1, n_1, m_0 + 1, n_0 - m_0 + 1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def print_result(p_val):\n",
			
 
				+    "    alpha = 0.05\n",
			
 
				+    "    # Significant results?\n",
			
 
				+    "    print(f\"{'Yes' if (p_val <= alpha) else 'No'}, the result is {'significant' if (p_val <= alpha) else 'insignificant'} because given the pre-split-year data, observing the post-split-year data has a {p_val*100:.2f}% probability.\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Are there more movies with a majority of actresses in the principal roles?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "p_val_actresses_in_majority = p_val_lost(\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_>_num_actors'], # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_>_num_actors'],  # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "print_result(p_val_actresses_in_majority)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Are there less movies with a minority of actresses in the principal roles?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "p_val_actresses_in_minority = p_val_won(\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_<_num_actors'], # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_<_num_actors'],  # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "print_result(p_val_actresses_in_minority)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Are there less movies with a majority of actresses in the principal roles?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "p_val_actresses_in_majority = p_val_won(\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_>_num_actors'], # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_>_num_actors'],  # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "print_result(p_val_actresses_in_majority)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Are there more movies with a minority of actresses in the principal roles?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "p_val_actresses_in_minority = p_val_lost(\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_<_num_actors'], # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_<_num_actors'],  # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "print_result(p_val_actresses_in_minority)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Are there less movies with zero actresses in the principal roles?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "p_val_actresses_eq_zero = p_val_won(\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_=_0'], # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_=_0'],  # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "print_result(p_val_actresses_eq_zero)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Are there more movies with more than zero actresses in the principal roles?"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "p_val_actresses_gt_zero = p_val_lost(\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_>_0'], # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_>_0'],  # <---\n",
			
 
				+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "print_result(p_val_actresses_gt_zero)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Results"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "In summary, the series on the beta-binomial test shows that there are more films with a majority of actresses and fewer films with a minority of actresses in the lead roles.  \n",
			
 
				+    "The rest of the tests with this test did not show significance.\n",
			
 
				+    "\n",
			
 
				+    "We interpret the results overall as an indicator of improvement in the proportion of principal actresses.\n",
			
 
				+    "\n",
			
 
				+    "Note: It is difficult for us to evaluate how reliable these results are. On the one hand, we've learned about the test method on a close example in the lecture and we are convinced that we can apply this model to this movie cast data, on the other hand, we don't know how meaningful this result imposes on the ratio of actors and actresses, despite the striking low p-values."
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.8"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/exp/exp-005_Relationship-Rating-and-Share-Actresses-on-principal-cast.ipynb
+++ b/exp/exp-005_Relationship-Rating-and-Share-Actresses-on-principal-cast.ipynb
		`@@ -0,0 +1 @@`
	1	`+dat/*.tsv.gz filter=lfs diff=lfs merge=lfs -text`
	2	`+oid sha256:f9454fae364a7848af28d763c84963a6fff6ff06d936e4f0fd8174834ed01441`
	3	`+size 151455252`
	2	`+oid sha256:7516751fcd51991d13adc04f5b24291d0b46e65475dd4183df1ad4dee7ee273f`
	3	`+size 386712793`
	2	`+oid sha256:7891c9b416445df2c72abe4bedf436abfa760623f8f6f9ad9a37b44bd40c45cb`
	3	`+size 6035379`