瀏覽代碼

Publish stash of the working repository

Tobias Stumpp 3 年之前
當前提交
d29bcf9353

+ 1
- 0
.gitattributes 查看文件

@@ -0,0 +1 @@
1
+dat/*.tsv.gz filter=lfs diff=lfs merge=lfs -text

+ 487
- 0
.gitignore 查看文件

@@ -0,0 +1,487 @@
1
+
2
+# Custom ignore rules
3
+
4
+dat/*
5
+!dat/*.tsv.gz
6
+doc/projectregistration2022/projectregistration.pdf
7
+
8
+
9
+# Created by https://www.toptal.com/developers/gitignore/api/latex,jupyternotebooks,python
10
+# Edit at https://www.toptal.com/developers/gitignore?templates=latex,jupyternotebooks,python
11
+
12
+### JupyterNotebooks ###
13
+# gitignore template for Jupyter Notebooks
14
+# website: http://jupyter.org/
15
+
16
+.ipynb_checkpoints
17
+*/.ipynb_checkpoints/*
18
+
19
+# IPython
20
+profile_default/
21
+ipython_config.py
22
+
23
+# Remove previous ipynb_checkpoints
24
+#   git rm -r .ipynb_checkpoints/
25
+
26
+### LaTeX ###
27
+## Core latex/pdflatex auxiliary files:
28
+*.aux
29
+*.lof
30
+*.log
31
+*.lot
32
+*.fls
33
+*.out
34
+*.toc
35
+*.fmt
36
+*.fot
37
+*.cb
38
+*.cb2
39
+.*.lb
40
+
41
+## Intermediate documents:
42
+*.dvi
43
+*.xdv
44
+*-converted-to.*
45
+# these rules might exclude image files for figures etc.
46
+# *.ps
47
+# *.eps
48
+# *.pdf
49
+
50
+## Generated if empty string is given at "Please type another file name for output:"
51
+.pdf
52
+
53
+## Bibliography auxiliary files (bibtex/biblatex/biber):
54
+*.bbl
55
+*.bcf
56
+*.blg
57
+*-blx.aux
58
+*-blx.bib
59
+*.run.xml
60
+
61
+## Build tool auxiliary files:
62
+*.fdb_latexmk
63
+*.synctex
64
+*.synctex(busy)
65
+*.synctex.gz
66
+*.synctex.gz(busy)
67
+*.pdfsync
68
+
69
+## Build tool directories for auxiliary files
70
+# latexrun
71
+latex.out/
72
+
73
+## Auxiliary and intermediate files from other packages:
74
+# algorithms
75
+*.alg
76
+*.loa
77
+
78
+# achemso
79
+acs-*.bib
80
+
81
+# amsthm
82
+*.thm
83
+
84
+# beamer
85
+*.nav
86
+*.pre
87
+*.snm
88
+*.vrb
89
+
90
+# changes
91
+*.soc
92
+
93
+# comment
94
+*.cut
95
+
96
+# cprotect
97
+*.cpt
98
+
99
+# elsarticle (documentclass of Elsevier journals)
100
+*.spl
101
+
102
+# endnotes
103
+*.ent
104
+
105
+# fixme
106
+*.lox
107
+
108
+# feynmf/feynmp
109
+*.mf
110
+*.mp
111
+*.t[1-9]
112
+*.t[1-9][0-9]
113
+*.tfm
114
+
115
+#(r)(e)ledmac/(r)(e)ledpar
116
+*.end
117
+*.?end
118
+*.[1-9]
119
+*.[1-9][0-9]
120
+*.[1-9][0-9][0-9]
121
+*.[1-9]R
122
+*.[1-9][0-9]R
123
+*.[1-9][0-9][0-9]R
124
+*.eledsec[1-9]
125
+*.eledsec[1-9]R
126
+*.eledsec[1-9][0-9]
127
+*.eledsec[1-9][0-9]R
128
+*.eledsec[1-9][0-9][0-9]
129
+*.eledsec[1-9][0-9][0-9]R
130
+
131
+# glossaries
132
+*.acn
133
+*.acr
134
+*.glg
135
+*.glo
136
+*.gls
137
+*.glsdefs
138
+*.lzo
139
+*.lzs
140
+*.slg
141
+*.slo
142
+*.sls
143
+
144
+# uncomment this for glossaries-extra (will ignore makeindex's style files!)
145
+# *.ist
146
+
147
+# gnuplot
148
+*.gnuplot
149
+*.table
150
+
151
+# gnuplottex
152
+*-gnuplottex-*
153
+
154
+# gregoriotex
155
+*.gaux
156
+*.glog
157
+*.gtex
158
+
159
+# htlatex
160
+*.4ct
161
+*.4tc
162
+*.idv
163
+*.lg
164
+*.trc
165
+*.xref
166
+
167
+# hyperref
168
+*.brf
169
+
170
+# knitr
171
+*-concordance.tex
172
+# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
173
+# *.tikz
174
+*-tikzDictionary
175
+
176
+# listings
177
+*.lol
178
+
179
+# luatexja-ruby
180
+*.ltjruby
181
+
182
+# makeidx
183
+*.idx
184
+*.ilg
185
+*.ind
186
+
187
+# minitoc
188
+*.maf
189
+*.mlf
190
+*.mlt
191
+*.mtc[0-9]*
192
+*.slf[0-9]*
193
+*.slt[0-9]*
194
+*.stc[0-9]*
195
+
196
+# minted
197
+_minted*
198
+*.pyg
199
+
200
+# morewrites
201
+*.mw
202
+
203
+# newpax
204
+*.newpax
205
+
206
+# nomencl
207
+*.nlg
208
+*.nlo
209
+*.nls
210
+
211
+# pax
212
+*.pax
213
+
214
+# pdfpcnotes
215
+*.pdfpc
216
+
217
+# sagetex
218
+*.sagetex.sage
219
+*.sagetex.py
220
+*.sagetex.scmd
221
+
222
+# scrwfile
223
+*.wrt
224
+
225
+# svg
226
+svg-inkscape/
227
+
228
+# sympy
229
+*.sout
230
+*.sympy
231
+sympy-plots-for-*.tex/
232
+
233
+# pdfcomment
234
+*.upa
235
+*.upb
236
+
237
+# pythontex
238
+*.pytxcode
239
+pythontex-files-*/
240
+
241
+# tcolorbox
242
+*.listing
243
+
244
+# thmtools
245
+*.loe
246
+
247
+# TikZ & PGF
248
+*.dpth
249
+*.md5
250
+*.auxlock
251
+
252
+# titletoc
253
+*.ptc
254
+
255
+# todonotes
256
+*.tdo
257
+
258
+# vhistory
259
+*.hst
260
+*.ver
261
+
262
+# easy-todo
263
+*.lod
264
+
265
+# xcolor
266
+*.xcp
267
+
268
+# xmpincl
269
+*.xmpi
270
+
271
+# xindy
272
+*.xdy
273
+
274
+# xypic precompiled matrices and outlines
275
+*.xyc
276
+*.xyd
277
+
278
+# endfloat
279
+*.ttt
280
+*.fff
281
+
282
+# Latexian
283
+TSWLatexianTemp*
284
+
285
+## Editors:
286
+# WinEdt
287
+*.bak
288
+*.sav
289
+
290
+# Texpad
291
+.texpadtmp
292
+
293
+# LyX
294
+*.lyx~
295
+
296
+# Kile
297
+*.backup
298
+
299
+# gummi
300
+.*.swp
301
+
302
+# KBibTeX
303
+*~[0-9]*
304
+
305
+# TeXnicCenter
306
+*.tps
307
+
308
+# auto folder when using emacs and auctex
309
+./auto/*
310
+*.el
311
+
312
+# expex forward references with \gathertags
313
+*-tags.tex
314
+
315
+# standalone packages
316
+*.sta
317
+
318
+# Makeindex log files
319
+*.lpz
320
+
321
+# xwatermark package
322
+*.xwm
323
+
324
+# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
325
+# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
326
+# Uncomment the next line to have this generated file ignored.
327
+#*Notes.bib
328
+
329
+### LaTeX Patch ###
330
+# LIPIcs / OASIcs
331
+*.vtc
332
+
333
+# glossaries
334
+*.glstex
335
+
336
+### Python ###
337
+# Byte-compiled / optimized / DLL files
338
+__pycache__/
339
+*.py[cod]
340
+*$py.class
341
+
342
+# C extensions
343
+*.so
344
+
345
+# Distribution / packaging
346
+.Python
347
+build/
348
+develop-eggs/
349
+dist/
350
+downloads/
351
+eggs/
352
+.eggs/
353
+lib/
354
+lib64/
355
+parts/
356
+sdist/
357
+var/
358
+wheels/
359
+share/python-wheels/
360
+*.egg-info/
361
+.installed.cfg
362
+*.egg
363
+MANIFEST
364
+
365
+# PyInstaller
366
+#  Usually these files are written by a python script from a template
367
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
368
+*.manifest
369
+*.spec
370
+
371
+# Installer logs
372
+pip-log.txt
373
+pip-delete-this-directory.txt
374
+
375
+# Unit test / coverage reports
376
+htmlcov/
377
+.tox/
378
+.nox/
379
+.coverage
380
+.coverage.*
381
+.cache
382
+nosetests.xml
383
+coverage.xml
384
+*.cover
385
+*.py,cover
386
+.hypothesis/
387
+.pytest_cache/
388
+cover/
389
+
390
+# Translations
391
+*.mo
392
+*.pot
393
+
394
+# Django stuff:
395
+local_settings.py
396
+db.sqlite3
397
+db.sqlite3-journal
398
+
399
+# Flask stuff:
400
+instance/
401
+.webassets-cache
402
+
403
+# Scrapy stuff:
404
+.scrapy
405
+
406
+# Sphinx documentation
407
+docs/_build/
408
+
409
+# PyBuilder
410
+.pybuilder/
411
+target/
412
+
413
+# Jupyter Notebook
414
+
415
+# IPython
416
+
417
+# pyenv
418
+#   For a library or package, you might want to ignore these files since the code is
419
+#   intended to run in multiple environments; otherwise, check them in:
420
+# .python-version
421
+
422
+# pipenv
423
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
424
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
425
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
426
+#   install all needed dependencies.
427
+#Pipfile.lock
428
+
429
+# poetry
430
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
431
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
432
+#   commonly ignored for libraries.
433
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
434
+#poetry.lock
435
+
436
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
437
+__pypackages__/
438
+
439
+# Celery stuff
440
+celerybeat-schedule
441
+celerybeat.pid
442
+
443
+# SageMath parsed files
444
+*.sage.py
445
+
446
+# Environments
447
+.env
448
+.venv
449
+env/
450
+venv/
451
+ENV/
452
+env.bak/
453
+venv.bak/
454
+
455
+# Spyder project settings
456
+.spyderproject
457
+.spyproject
458
+
459
+# Rope project settings
460
+.ropeproject
461
+
462
+# mkdocs documentation
463
+/site
464
+
465
+# mypy
466
+.mypy_cache/
467
+.dmypy.json
468
+dmypy.json
469
+
470
+# Pyre type checker
471
+.pyre/
472
+
473
+# pytype static type analyzer
474
+.pytype/
475
+
476
+# Cython debug symbols
477
+cython_debug/
478
+
479
+# PyCharm
480
+#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
481
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
482
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
483
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
484
+#.idea/
485
+
486
+# End of https://www.toptal.com/developers/gitignore/api/latex,jupyternotebooks,python
487
+

二進制
dat/title.basics.tsv.gz (儲存到到 Git LFS) 查看文件

2
+oid sha256:f9454fae364a7848af28d763c84963a6fff6ff06d936e4f0fd8174834ed01441
3
+size 151455252

二進制
dat/title.principals.tsv.gz (儲存到到 Git LFS) 查看文件

2
+oid sha256:7516751fcd51991d13adc04f5b24291d0b46e65475dd4183df1ad4dee7ee273f
3
+size 386712793

二進制
dat/title.ratings.tsv.gz (儲存到到 Git LFS) 查看文件

2
+oid sha256:7891c9b416445df2c72abe4bedf436abfa760623f8f6f9ad9a37b44bd40c45cb
3
+size 6035379

+ 377
- 0
doc/projectregistration2022/neurips_2021.sty 查看文件

@@ -0,0 +1,377 @@
1
+% partial rewrite of the LaTeX2e package for submissions to the
2
+% Conference on Neural Information Processing Systems (NeurIPS):
3
+%
4
+% - uses more LaTeX conventions
5
+% - line numbers at submission time replaced with aligned numbers from
6
+%   lineno package
7
+% - \nipsfinalcopy replaced with [final] package option
8
+% - automatically loads times package for authors
9
+% - loads natbib automatically; this can be suppressed with the
10
+%   [nonatbib] package option
11
+% - adds foot line to first page identifying the conference
12
+% - adds preprint option for submission to e.g. arXiv
13
+% - conference acronym modified
14
+%
15
+% Roman Garnett (garnett@wustl.edu) and the many authors of
16
+% nips15submit_e.sty, including MK and drstrip@sandia
17
+%
18
+% last revision: March 2021
19
+
20
+\NeedsTeXFormat{LaTeX2e}
21
+\ProvidesPackage{neurips_2021}[2021/03/31 NeurIPS 2021 submission/camera-ready style file]
22
+
23
+% declare final option, which creates camera-ready copy
24
+\newif\if@neuripsfinal\@neuripsfinalfalse
25
+\DeclareOption{final}{
26
+  \@neuripsfinaltrue
27
+}
28
+
29
+% declare nonatbib option, which does not load natbib in case of
30
+% package clash (users can pass options to natbib via
31
+% \PassOptionsToPackage)
32
+\newif\if@natbib\@natbibtrue
33
+\DeclareOption{nonatbib}{
34
+  \@natbibfalse
35
+}
36
+
37
+% declare preprint option, which creates a preprint version ready for
38
+% upload to, e.g., arXiv
39
+\newif\if@preprint\@preprintfalse
40
+\DeclareOption{preprint}{
41
+  \@preprinttrue
42
+}
43
+
44
+\ProcessOptions\relax
45
+
46
+% determine whether this is an anonymized submission
47
+\newif\if@submission\@submissiontrue
48
+\if@neuripsfinal\@submissionfalse\fi
49
+\if@preprint\@submissionfalse\fi
50
+
51
+% fonts
52
+\renewcommand{\rmdefault}{ptm}
53
+\renewcommand{\sfdefault}{phv}
54
+
55
+% change this every year for notice string at bottom
56
+\newcommand{\@neuripsordinal}{35th}
57
+\newcommand{\@neuripsyear}{2021}
58
+\newcommand{\@neuripslocation}{virtual}
59
+
60
+% acknowledgments
61
+\usepackage{environ}
62
+\newcommand{\acksection}{\section*{Acknowledgments and Disclosure of Funding}}
63
+\NewEnviron{ack}{%
64
+  \acksection
65
+  \BODY
66
+}
67
+
68
+% handle tweaks for camera-ready copy vs. submission copy
69
+\if@preprint
70
+  \newcommand{\@noticestring}{%
71
+    Project Report for \emph{Data Literacy} 2021/22
72
+  }
73
+\else
74
+  \if@neuripsfinal
75
+    \newcommand{\@noticestring}{%
76
+      \@neuripsordinal\/ Conference on Neural Information Processing Systems
77
+      (NeurIPS \@neuripsyear).%, \@neuripslocation.%
78
+    }
79
+  \else
80
+    \newcommand{\@noticestring}{%
81
+      Submitted to \@neuripsordinal\/ Conference on Neural Information
82
+      Processing Systems (NeurIPS \@neuripsyear). Do not distribute.%
83
+    }
84
+
85
+    % hide the acknowledgements
86
+    \NewEnviron{hide}{}
87
+    \let\ack\hide
88
+    \let\endack\endhide
89
+
90
+    % line numbers for submission
91
+    \RequirePackage{lineno}
92
+    \linenumbers
93
+
94
+    % fix incompatibilities between lineno and amsmath, if required, by
95
+    % transparently wrapping linenomath environments around amsmath
96
+    % environments
97
+    \AtBeginDocument{%
98
+      \@ifpackageloaded{amsmath}{%
99
+        \newcommand*\patchAmsMathEnvironmentForLineno[1]{%
100
+          \expandafter\let\csname old#1\expandafter\endcsname\csname #1\endcsname
101
+          \expandafter\let\csname oldend#1\expandafter\endcsname\csname end#1\endcsname
102
+          \renewenvironment{#1}%
103
+                           {\linenomath\csname old#1\endcsname}%
104
+                           {\csname oldend#1\endcsname\endlinenomath}%
105
+        }%
106
+        \newcommand*\patchBothAmsMathEnvironmentsForLineno[1]{%
107
+          \patchAmsMathEnvironmentForLineno{#1}%
108
+          \patchAmsMathEnvironmentForLineno{#1*}%
109
+        }%
110
+        \patchBothAmsMathEnvironmentsForLineno{equation}%
111
+        \patchBothAmsMathEnvironmentsForLineno{align}%
112
+        \patchBothAmsMathEnvironmentsForLineno{flalign}%
113
+        \patchBothAmsMathEnvironmentsForLineno{alignat}%
114
+        \patchBothAmsMathEnvironmentsForLineno{gather}%
115
+        \patchBothAmsMathEnvironmentsForLineno{multline}%
116
+      }{}
117
+    }
118
+  \fi
119
+\fi
120
+
121
+% load natbib unless told otherwise
122
+\if@natbib
123
+  \RequirePackage{natbib}
124
+\fi
125
+
126
+% set page geometry
127
+\usepackage[verbose=true,letterpaper]{geometry}
128
+\AtBeginDocument{
129
+  \newgeometry{
130
+    textheight=9in,
131
+    textwidth=5.5in,
132
+    top=1in,
133
+    headheight=12pt,
134
+    headsep=25pt,
135
+    footskip=30pt
136
+  }
137
+  \@ifpackageloaded{fullpage}
138
+    {\PackageWarning{neurips_2021}{fullpage package not allowed! Overwriting formatting.}}
139
+    {}
140
+}
141
+
142
+\widowpenalty=10000
143
+\clubpenalty=10000
144
+\flushbottom
145
+\sloppy
146
+
147
+% font sizes with reduced leading
148
+\renewcommand{\normalsize}{%
149
+  \@setfontsize\normalsize\@xpt\@xipt
150
+  \abovedisplayskip      7\p@ \@plus 2\p@ \@minus 5\p@
151
+  \abovedisplayshortskip \z@ \@plus 3\p@
152
+  \belowdisplayskip      \abovedisplayskip
153
+  \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@
154
+}
155
+\normalsize
156
+\renewcommand{\small}{%
157
+  \@setfontsize\small\@ixpt\@xpt
158
+  \abovedisplayskip      6\p@ \@plus 1.5\p@ \@minus 4\p@
159
+  \abovedisplayshortskip \z@  \@plus 2\p@
160
+  \belowdisplayskip      \abovedisplayskip
161
+  \belowdisplayshortskip 3\p@ \@plus 2\p@   \@minus 2\p@
162
+}
163
+\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt}
164
+\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt}
165
+\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt}
166
+\renewcommand{\large}{\@setfontsize\large\@xiipt{14}}
167
+\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}}
168
+\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}}
169
+\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}}
170
+\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}}
171
+
172
+% sections with less space
173
+\providecommand{\section}{}
174
+\renewcommand{\section}{%
175
+  \@startsection{section}{1}{\z@}%
176
+                {-2.0ex \@plus -0.5ex \@minus -0.2ex}%
177
+                { 1.5ex \@plus  0.3ex \@minus  0.2ex}%
178
+                {\large\bf\raggedright}%
179
+}
180
+\providecommand{\subsection}{}
181
+\renewcommand{\subsection}{%
182
+  \@startsection{subsection}{2}{\z@}%
183
+                {-1.8ex \@plus -0.5ex \@minus -0.2ex}%
184
+                { 0.8ex \@plus  0.2ex}%
185
+                {\normalsize\bf\raggedright}%
186
+}
187
+\providecommand{\subsubsection}{}
188
+\renewcommand{\subsubsection}{%
189
+  \@startsection{subsubsection}{3}{\z@}%
190
+                {-1.5ex \@plus -0.5ex \@minus -0.2ex}%
191
+                { 0.5ex \@plus  0.2ex}%
192
+                {\normalsize\bf\raggedright}%
193
+}
194
+\providecommand{\paragraph}{}
195
+\renewcommand{\paragraph}{%
196
+  \@startsection{paragraph}{4}{\z@}%
197
+                {1.5ex \@plus 0.5ex \@minus 0.2ex}%
198
+                {-1em}%
199
+                {\normalsize\bf}%
200
+}
201
+\providecommand{\subparagraph}{}
202
+\renewcommand{\subparagraph}{%
203
+  \@startsection{subparagraph}{5}{\z@}%
204
+                {1.5ex \@plus 0.5ex \@minus 0.2ex}%
205
+                {-1em}%
206
+                {\normalsize\bf}%
207
+}
208
+\providecommand{\subsubsubsection}{}
209
+\renewcommand{\subsubsubsection}{%
210
+  \vskip5pt{\noindent\normalsize\rm\raggedright}%
211
+}
212
+
213
+% float placement
214
+\renewcommand{\topfraction      }{0.85}
215
+\renewcommand{\bottomfraction   }{0.4}
216
+\renewcommand{\textfraction     }{0.1}
217
+\renewcommand{\floatpagefraction}{0.7}
218
+
219
+\newlength{\@neuripsabovecaptionskip}\setlength{\@neuripsabovecaptionskip}{7\p@}
220
+\newlength{\@neuripsbelowcaptionskip}\setlength{\@neuripsbelowcaptionskip}{\z@}
221
+
222
+\setlength{\abovecaptionskip}{\@neuripsabovecaptionskip}
223
+\setlength{\belowcaptionskip}{\@neuripsbelowcaptionskip}
224
+
225
+% swap above/belowcaptionskip lengths for tables
226
+\renewenvironment{table}
227
+  {\setlength{\abovecaptionskip}{\@neuripsbelowcaptionskip}%
228
+   \setlength{\belowcaptionskip}{\@neuripsabovecaptionskip}%
229
+   \@float{table}}
230
+  {\end@float}
231
+
232
+% footnote formatting
233
+\setlength{\footnotesep }{6.65\p@}
234
+\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@}
235
+\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@}
236
+\setcounter{footnote}{0}
237
+
238
+% paragraph formatting
239
+\setlength{\parindent}{\z@}
240
+\setlength{\parskip  }{5.5\p@}
241
+
242
+% list formatting
243
+\setlength{\topsep       }{4\p@ \@plus 1\p@   \@minus 2\p@}
244
+\setlength{\partopsep    }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@}
245
+\setlength{\itemsep      }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
246
+\setlength{\parsep       }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
247
+\setlength{\leftmargin   }{3pc}
248
+\setlength{\leftmargini  }{\leftmargin}
249
+\setlength{\leftmarginii }{2em}
250
+\setlength{\leftmarginiii}{1.5em}
251
+\setlength{\leftmarginiv }{1.0em}
252
+\setlength{\leftmarginv  }{0.5em}
253
+\def\@listi  {\leftmargin\leftmargini}
254
+\def\@listii {\leftmargin\leftmarginii
255
+              \labelwidth\leftmarginii
256
+              \advance\labelwidth-\labelsep
257
+              \topsep  2\p@ \@plus 1\p@    \@minus 0.5\p@
258
+              \parsep  1\p@ \@plus 0.5\p@ \@minus 0.5\p@
259
+              \itemsep \parsep}
260
+\def\@listiii{\leftmargin\leftmarginiii
261
+              \labelwidth\leftmarginiii
262
+              \advance\labelwidth-\labelsep
263
+              \topsep    1\p@ \@plus 0.5\p@ \@minus 0.5\p@
264
+              \parsep    \z@
265
+              \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@
266
+              \itemsep \topsep}
267
+\def\@listiv {\leftmargin\leftmarginiv
268
+              \labelwidth\leftmarginiv
269
+              \advance\labelwidth-\labelsep}
270
+\def\@listv  {\leftmargin\leftmarginv
271
+              \labelwidth\leftmarginv
272
+              \advance\labelwidth-\labelsep}
273
+\def\@listvi {\leftmargin\leftmarginvi
274
+              \labelwidth\leftmarginvi
275
+              \advance\labelwidth-\labelsep}
276
+
277
+% create title
278
+\providecommand{\maketitle}{}
279
+\renewcommand{\maketitle}{%
280
+  \par
281
+  \begingroup
282
+    \renewcommand{\thefootnote}{\fnsymbol{footnote}}
283
+    % for perfect author name centering
284
+    \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}}
285
+    % The footnote-mark was overlapping the footnote-text,
286
+    % added the following to fix this problem               (MK)
287
+    \long\def\@makefntext##1{%
288
+      \parindent 1em\noindent
289
+      \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1
290
+    }
291
+    \thispagestyle{empty}
292
+    \@maketitle
293
+    \@thanks
294
+    \@notice
295
+  \endgroup
296
+  \let\maketitle\relax
297
+  \let\thanks\relax
298
+}
299
+
300
+% rules for title box at top of first page
301
+\newcommand{\@toptitlebar}{
302
+  \hrule height 4\p@
303
+  \vskip 0.25in
304
+  \vskip -\parskip%
305
+}
306
+\newcommand{\@bottomtitlebar}{
307
+  \vskip 0.29in
308
+  \vskip -\parskip
309
+  \hrule height 1\p@
310
+  \vskip 0.09in%
311
+}
312
+
313
+% create title (includes both anonymized and non-anonymized versions)
314
+\providecommand{\@maketitle}{}
315
+\renewcommand{\@maketitle}{%
316
+  \vbox{%
317
+    \hsize\textwidth
318
+    \linewidth\hsize
319
+    \vskip 0.1in
320
+    \@toptitlebar
321
+    \centering
322
+    {\LARGE\bf \@title\par}
323
+    \@bottomtitlebar
324
+    \if@submission
325
+      \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}
326
+        Anonymous Author(s) \\
327
+        Affiliation \\
328
+        Address \\
329
+        \texttt{email} \\
330
+      \end{tabular}%
331
+    \else
332
+      \def\And{%
333
+        \end{tabular}\hfil\linebreak[0]\hfil%
334
+        \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
335
+      }
336
+      \def\AND{%
337
+        \end{tabular}\hfil\linebreak[4]\hfil%
338
+        \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
339
+      }
340
+      \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}%
341
+    \fi
342
+    \vskip 0.3in \@minus 0.1in
343
+  }
344
+}
345
+
346
+% add conference notice to bottom of first page
347
+\newcommand{\ftype@noticebox}{8}
348
+\newcommand{\@notice}{%
349
+  % give a bit of extra room back to authors on first page
350
+  \enlargethispage{2\baselineskip}%
351
+  \@float{noticebox}[b]%
352
+    \footnotesize\@noticestring%
353
+  \end@float%
354
+}
355
+
356
+% abstract styling
357
+\renewenvironment{abstract}%
358
+{%
359
+  \vskip 0.075in%
360
+  \centerline%
361
+  {\large\bf Abstract}%
362
+  \vspace{0.5ex}%
363
+  \begin{quote}%
364
+}
365
+{
366
+  \par%
367
+  \end{quote}%
368
+  \vskip 1ex%
369
+}
370
+
371
+% For the paper checklist
372
+\newcommand{\answerYes}[1][]{\textcolor{blue}{[Yes] #1}}
373
+\newcommand{\answerNo}[1][]{\textcolor{orange}{[No] #1}}
374
+\newcommand{\answerNA}[1][]{\textcolor{gray}{[N/A] #1}}
375
+\newcommand{\answerTODO}[1][]{\textcolor{red}{\bf [TODO]}}
376
+
377
+\endinput

+ 491
- 0
doc/projectregistration2022/neurips_2021.tex 查看文件

@@ -0,0 +1,491 @@
1
+\documentclass{article}
2
+
3
+% if you need to pass options to natbib, use, e.g.:
4
+%     \PassOptionsToPackage{numbers, compress}{natbib}
5
+% before loading neurips_2021
6
+
7
+% ready for submission
8
+\usepackage[preprint]{neurips_2021}
9
+
10
+% to compile a preprint version, e.g., for submission to arXiv, add add the
11
+% [preprint] option:
12
+%     \usepackage[preprint]{neurips_2021}
13
+
14
+% to compile a camera-ready version, add the [final] option, e.g.:
15
+%     \usepackage[final]{neurips_2021}
16
+
17
+% to avoid loading the natbib package, add option nonatbib:
18
+%    \usepackage[nonatbib]{neurips_2021}
19
+
20
+\usepackage[utf8]{inputenc} % allow utf-8 input
21
+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
22
+\usepackage{hyperref}       % hyperlinks
23
+\usepackage{url}            % simple URL typesetting
24
+\usepackage{booktabs}       % professional-quality tables
25
+\usepackage{amsfonts}       % blackboard math symbols
26
+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
27
+\usepackage{microtype}      % microtypography
28
+\usepackage{xcolor}         % colors
29
+
30
+\title{Formatting Instructions For NeurIPS 2021}
31
+
32
+% The \author macro works with any number of authors. There are two commands
33
+% used to separate the names and addresses of multiple authors: \And and \AND.
34
+%
35
+% Using \And between authors leaves it to LaTeX to determine where to break the
36
+% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
37
+% authors names on the first line, and the last on the second line, try using
38
+% \AND instead of \And before the third author name.
39
+
40
+\author{%
41
+  David S.~Hippocampus\thanks{Use footnote for providing further information
42
+    about author (webpage, alternative address)---\emph{not} for acknowledging
43
+    funding agencies.} \\
44
+  Department of Computer Science\\
45
+  Cranberry-Lemon University\\
46
+  Pittsburgh, PA 15213 \\
47
+  \texttt{hippo@cs.cranberry-lemon.edu} \\
48
+  % examples of more authors
49
+  % \And
50
+  % Coauthor \\
51
+  % Affiliation \\
52
+  % Address \\
53
+  % \texttt{email} \\
54
+  % \AND
55
+  % Coauthor \\
56
+  % Affiliation \\
57
+  % Address \\
58
+  % \texttt{email} \\
59
+  % \And
60
+  % Coauthor \\
61
+  % Affiliation \\
62
+  % Address \\
63
+  % \texttt{email} \\
64
+  % \And
65
+  % Coauthor \\
66
+  % Affiliation \\
67
+  % Address \\
68
+  % \texttt{email} \\
69
+}
70
+
71
+\begin{document}
72
+
73
+\maketitle
74
+
75
+\begin{abstract}
76
+  The abstract paragraph should be indented \nicefrac{1}{2}~inch (3~picas) on
77
+  both the left- and right-hand margins. Use 10~point type, with a vertical
78
+  spacing (leading) of 11~points.  The word \textbf{Abstract} must be centered,
79
+  bold, and in point size 12. Two line spaces precede the abstract. The abstract
80
+  must be limited to one paragraph.
81
+\end{abstract}
82
+
83
+\section{Submission of papers to NeurIPS 2021}
84
+
85
+Please read the instructions below carefully and follow them faithfully.
86
+
87
+\subsection{Style}
88
+
89
+Papers to be submitted to NeurIPS 2021 must be prepared according to the
90
+instructions presented here. Papers may only be up to {\bf nine} pages long,
91
+including figures. Additional pages \emph{containing only acknowledgments and
92
+references} are allowed. Papers that exceed the page limit will not be
93
+reviewed, or in any other way considered for presentation at the conference.
94
+
95
+The margins in 2021 are the same as those in 2007, which allow for $\sim$$15\%$
96
+more words in the paper compared to earlier years.
97
+
98
+Authors are required to use the NeurIPS \LaTeX{} style files obtainable at the
99
+NeurIPS website as indicated below. Please make sure you use the current files
100
+and not previous versions. Tweaking the style files may be grounds for
101
+rejection.
102
+
103
+\subsection{Retrieval of style files}
104
+
105
+The style files for NeurIPS and other conference information are available on
106
+the World Wide Web at
107
+\begin{center}
108
+  \url{http://www.neurips.cc/}
109
+\end{center}
110
+The file \verb+neurips_2021.pdf+ contains these instructions and illustrates the
111
+various formatting requirements your NeurIPS paper must satisfy.
112
+
113
+The only supported style file for NeurIPS 2021 is \verb+neurips_2021.sty+,
114
+rewritten for \LaTeXe{}.  \textbf{Previous style files for \LaTeX{} 2.09,
115
+  Microsoft Word, and RTF are no longer supported!}
116
+
117
+The \LaTeX{} style file contains three optional arguments: \verb+final+, which
118
+creates a camera-ready copy, \verb+preprint+, which creates a preprint for
119
+submission to, e.g., arXiv, and \verb+nonatbib+, which will not load the
120
+\verb+natbib+ package for you in case of package clash.
121
+
122
+\paragraph{Preprint option}
123
+If you wish to post a preprint of your work online, e.g., on arXiv, using the
124
+NeurIPS style, please use the \verb+preprint+ option. This will create a
125
+nonanonymized version of your work with the text ``Preprint. Work in progress.''
126
+in the footer. This version may be distributed as you see fit. Please \textbf{do
127
+  not} use the \verb+final+ option, which should \textbf{only} be used for
128
+papers accepted to NeurIPS.
129
+
130
+At submission time, please omit the \verb+final+ and \verb+preprint+
131
+options. This will anonymize your submission and add line numbers to aid
132
+review. Please do \emph{not} refer to these line numbers in your paper as they
133
+will be removed during generation of camera-ready copies.
134
+
135
+The file \verb+neurips_2021.tex+ may be used as a ``shell'' for writing your
136
+paper. All you have to do is replace the author, title, abstract, and text of
137
+the paper with your own.
138
+
139
+The formatting instructions contained in these style files are summarized in
140
+Sections \ref{gen_inst}, \ref{headings}, and \ref{others} below.
141
+
142
+\section{General formatting instructions}
143
+\label{gen_inst}
144
+
145
+The text must be confined within a rectangle 5.5~inches (33~picas) wide and
146
+9~inches (54~picas) long. The left margin is 1.5~inch (9~picas).  Use 10~point
147
+type with a vertical spacing (leading) of 11~points.  Times New Roman is the
148
+preferred typeface throughout, and will be selected for you by default.
149
+Paragraphs are separated by \nicefrac{1}{2}~line space (5.5 points), with no
150
+indentation.
151
+
152
+The paper title should be 17~point, initial caps/lower case, bold, centered
153
+between two horizontal rules. The top rule should be 4~points thick and the
154
+bottom rule should be 1~point thick. Allow \nicefrac{1}{4}~inch space above and
155
+below the title to rules. All pages should start at 1~inch (6~picas) from the
156
+top of the page.
157
+
158
+For the final version, authors' names are set in boldface, and each name is
159
+centered above the corresponding address. The lead author's name is to be listed
160
+first (left-most), and the co-authors' names (if different address) are set to
161
+follow. If there is only one co-author, list both author and co-author side by
162
+side.
163
+
164
+Please pay special attention to the instructions in Section \ref{others}
165
+regarding figures, tables, acknowledgments, and references.
166
+
167
+\section{Headings: first level}
168
+\label{headings}
169
+
170
+All headings should be lower case (except for first word and proper nouns),
171
+flush left, and bold.
172
+
173
+First-level headings should be in 12-point type.
174
+
175
+\subsection{Headings: second level}
176
+
177
+Second-level headings should be in 10-point type.
178
+
179
+\subsubsection{Headings: third level}
180
+
181
+Third-level headings should be in 10-point type.
182
+
183
+\paragraph{Paragraphs}
184
+
185
+There is also a \verb+\paragraph+ command available, which sets the heading in
186
+bold, flush left, and inline with the text, with the heading followed by 1\,em
187
+of space.
188
+
189
+\section{Citations, figures, tables, references}
190
+\label{others}
191
+
192
+These instructions apply to everyone.
193
+
194
+\subsection{Citations within the text}
195
+
196
+The \verb+natbib+ package will be loaded for you by default.  Citations may be
197
+author/year or numeric, as long as you maintain internal consistency.  As to the
198
+format of the references themselves, any style is acceptable as long as it is
199
+used consistently.
200
+
201
+The documentation for \verb+natbib+ may be found at
202
+\begin{center}
203
+  \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf}
204
+\end{center}
205
+Of note is the command \verb+\citet+, which produces citations appropriate for
206
+use in inline text.  For example,
207
+\begin{verbatim}
208
+   \citet{hasselmo} investigated\dots
209
+\end{verbatim}
210
+produces
211
+\begin{quote}
212
+  Hasselmo, et al.\ (1995) investigated\dots
213
+\end{quote}
214
+
215
+If you wish to load the \verb+natbib+ package with options, you may add the
216
+following before loading the \verb+neurips_2021+ package:
217
+\begin{verbatim}
218
+   \PassOptionsToPackage{options}{natbib}
219
+\end{verbatim}
220
+
221
+If \verb+natbib+ clashes with another package you load, you can add the optional
222
+argument \verb+nonatbib+ when loading the style file:
223
+\begin{verbatim}
224
+   \usepackage[nonatbib]{neurips_2021}
225
+\end{verbatim}
226
+
227
+As submission is double blind, refer to your own published work in the third
228
+person. That is, use ``In the previous work of Jones et al.\ [4],'' not ``In our
229
+previous work [4].'' If you cite your other papers that are not widely available
230
+(e.g., a journal paper under review), use anonymous author names in the
231
+citation, e.g., an author of the form ``A.\ Anonymous.''
232
+
233
+\subsection{Footnotes}
234
+
235
+Footnotes should be used sparingly.  If you do require a footnote, indicate
236
+footnotes with a number\footnote{Sample of the first footnote.} in the
237
+text. Place the footnotes at the bottom of the page on which they appear.
238
+Precede the footnote with a horizontal rule of 2~inches (12~picas).
239
+
240
+Note that footnotes are properly typeset \emph{after} punctuation
241
+marks.\footnote{As in this example.}
242
+
243
+\subsection{Figures}
244
+
245
+\begin{figure}
246
+  \centering
247
+  \fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
248
+  \caption{Sample figure caption.}
249
+\end{figure}
250
+
251
+All artwork must be neat, clean, and legible. Lines should be dark enough for
252
+purposes of reproduction. The figure number and caption always appear after the
253
+figure. Place one line space before the figure caption and one line space after
254
+the figure. The figure caption should be lower case (except for first word and
255
+proper nouns); figures are numbered consecutively.
256
+
257
+You may use color figures.  However, it is best for the figure captions and the
258
+paper body to be legible if the paper is printed in either black/white or in
259
+color.
260
+
261
+\subsection{Tables}
262
+
263
+All tables must be centered, neat, clean and legible.  The table number and
264
+title always appear before the table.  See Table~\ref{sample-table}.
265
+
266
+Place one line space before the table title, one line space after the
267
+table title, and one line space after the table. The table title must
268
+be lower case (except for first word and proper nouns); tables are
269
+numbered consecutively.
270
+
271
+Note that publication-quality tables \emph{do not contain vertical rules.} We
272
+strongly suggest the use of the \verb+booktabs+ package, which allows for
273
+typesetting high-quality, professional tables:
274
+\begin{center}
275
+  \url{https://www.ctan.org/pkg/booktabs}
276
+\end{center}
277
+This package was used to typeset Table~\ref{sample-table}.
278
+
279
+\begin{table}
280
+  \caption{Sample table title}
281
+  \label{sample-table}
282
+  \centering
283
+  \begin{tabular}{lll}
284
+    \toprule
285
+    \multicolumn{2}{c}{Part}                   \\
286
+    \cmidrule(r){1-2}
287
+    Name     & Description     & Size ($\mu$m) \\
288
+    \midrule
289
+    Dendrite & Input terminal  & $\sim$100     \\
290
+    Axon     & Output terminal & $\sim$10      \\
291
+    Soma     & Cell body       & up to $10^6$  \\
292
+    \bottomrule
293
+  \end{tabular}
294
+\end{table}
295
+
296
+\section{Final instructions}
297
+
298
+Do not change any aspects of the formatting parameters in the style files.  In
299
+particular, do not modify the width or length of the rectangle the text should
300
+fit into, and do not change font sizes (except perhaps in the
301
+\textbf{References} section; see below). Please note that pages should be
302
+numbered.
303
+
304
+\section{Preparing PDF files}
305
+
306
+Please prepare submission files with paper size ``US Letter,'' and not, for
307
+example, ``A4.''
308
+
309
+Fonts were the main cause of problems in the past years. Your PDF file must only
310
+contain Type 1 or Embedded TrueType fonts. Here are a few instructions to
311
+achieve this.
312
+
313
+\begin{itemize}
314
+
315
+\item You should directly generate PDF files using \verb+pdflatex+.
316
+
317
+\item You can check which fonts a PDF files uses.  In Acrobat Reader, select the
318
+  menu Files$>$Document Properties$>$Fonts and select Show All Fonts. You can
319
+  also use the program \verb+pdffonts+ which comes with \verb+xpdf+ and is
320
+  available out-of-the-box on most Linux machines.
321
+
322
+\item The IEEE has recommendations for generating PDF files whose fonts are also
323
+  acceptable for NeurIPS. Please see
324
+  \url{http://www.emfield.org/icuwb2010/downloads/IEEE-PDF-SpecV32.pdf}
325
+
326
+\item \verb+xfig+ "patterned" shapes are implemented with bitmap fonts.  Use
327
+  "solid" shapes instead.
328
+
329
+\item The \verb+\bbold+ package almost always uses bitmap fonts.  You should use
330
+  the equivalent AMS Fonts:
331
+\begin{verbatim}
332
+   \usepackage{amsfonts}
333
+\end{verbatim}
334
+followed by, e.g., \verb+\mathbb{R}+, \verb+\mathbb{N}+, or \verb+\mathbb{C}+
335
+for $\mathbb{R}$, $\mathbb{N}$ or $\mathbb{C}$.  You can also use the following
336
+workaround for reals, natural and complex:
337
+\begin{verbatim}
338
+   \newcommand{\RR}{I\!\!R} %real numbers
339
+   \newcommand{\Nat}{I\!\!N} %natural numbers
340
+   \newcommand{\CC}{I\!\!\!\!C} %complex numbers
341
+\end{verbatim}
342
+Note that \verb+amsfonts+ is automatically loaded by the \verb+amssymb+ package.
343
+
344
+\end{itemize}
345
+
346
+If your file contains type 3 fonts or non embedded TrueType fonts, we will ask
347
+you to fix it.
348
+
349
+\subsection{Margins in \LaTeX{}}
350
+
351
+Most of the margin problems come from figures positioned by hand using
352
+\verb+\special+ or other commands. We suggest using the command
353
+\verb+\includegraphics+ from the \verb+graphicx+ package. Always specify the
354
+figure width as a multiple of the line width as in the example below:
355
+\begin{verbatim}
356
+   \usepackage[pdftex]{graphicx} ...
357
+   \includegraphics[width=0.8\linewidth]{myfile.pdf}
358
+\end{verbatim}
359
+See Section 4.4 in the graphics bundle documentation
360
+(\url{http://mirrors.ctan.org/macros/latex/required/graphics/grfguide.pdf})
361
+
362
+A number of width problems arise when \LaTeX{} cannot properly hyphenate a
363
+line. Please give LaTeX hyphenation hints using the \verb+\-+ command when
364
+necessary.
365
+
366
+\begin{ack}
367
+Use unnumbered first level headings for the acknowledgments. All acknowledgments
368
+go at the end of the paper before the list of references. Moreover, you are required to declare
369
+funding (financial activities supporting the submitted work) and competing interests (related financial activities outside the submitted work).
370
+More information about this disclosure can be found at: \url{https://neurips.cc/Conferences/2021/PaperInformation/FundingDisclosure}.
371
+
372
+Do {\bf not} include this section in the anonymized submission, only in the final paper. You can use the \texttt{ack} environment provided in the style file to autmoatically hide this section in the anonymized submission.
373
+\end{ack}
374
+
375
+\section*{References}
376
+
377
+References follow the acknowledgments. Use unnumbered first-level heading for
378
+the references. Any choice of citation style is acceptable as long as you are
379
+consistent. It is permissible to reduce the font size to \verb+small+ (9 point)
380
+when listing the references.
381
+Note that the Reference section does not count towards the page limit.
382
+\medskip
383
+
384
+{
385
+\small
386
+
387
+[1] Alexander, J.A.\ \& Mozer, M.C.\ (1995) Template-based algorithms for
388
+connectionist rule extraction. In G.\ Tesauro, D.S.\ Touretzky and T.K.\ Leen
389
+(eds.), {\it Advances in Neural Information Processing Systems 7},
390
+pp.\ 609--616. Cambridge, MA: MIT Press.
391
+
392
+[2] Bower, J.M.\ \& Beeman, D.\ (1995) {\it The Book of GENESIS: Exploring
393
+  Realistic Neural Models with the GEneral NEural SImulation System.}  New York:
394
+TELOS/Springer--Verlag.
395
+
396
+[3] Hasselmo, M.E., Schnell, E.\ \& Barkai, E.\ (1995) Dynamics of learning and
397
+recall at excitatory recurrent synapses and cholinergic modulation in rat
398
+hippocampal region CA3. {\it Journal of Neuroscience} {\bf 15}(7):5249-5262.
399
+}
400
+
401
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
402
+\section*{Checklist}
403
+
404
+%%% BEGIN INSTRUCTIONS %%%
405
+The checklist follows the references.  Please
406
+read the checklist guidelines carefully for information on how to answer these
407
+questions.  For each question, change the default \answerTODO{} to \answerYes{},
408
+\answerNo{}, or \answerNA{}.  You are strongly encouraged to include a {\bf
409
+justification to your answer}, either by referencing the appropriate section of
410
+your paper or providing a brief inline description.  For example:
411
+\begin{itemize}
412
+  \item Did you include the license to the code and datasets? \answerYes{See Section~\ref{gen_inst}.}
413
+  \item Did you include the license to the code and datasets? \answerNo{The code and the data are proprietary.}
414
+  \item Did you include the license to the code and datasets? \answerNA{}
415
+\end{itemize}
416
+Please do not modify the questions and only use the provided macros for your
417
+answers.  Note that the Checklist section does not count towards the page
418
+limit.  In your paper, please delete this instructions block and only keep the
419
+Checklist section heading above along with the questions/answers below.
420
+%%% END INSTRUCTIONS %%%
421
+
422
+\begin{enumerate}
423
+
424
+\item For all authors...
425
+\begin{enumerate}
426
+  \item Do the main claims made in the abstract and introduction accurately reflect the paper's contributions and scope?
427
+    \answerTODO{}
428
+  \item Did you describe the limitations of your work?
429
+    \answerTODO{}
430
+  \item Did you discuss any potential negative societal impacts of your work?
431
+    \answerTODO{}
432
+  \item Have you read the ethics review guidelines and ensured that your paper conforms to them?
433
+    \answerTODO{}
434
+\end{enumerate}
435
+
436
+\item If you are including theoretical results...
437
+\begin{enumerate}
438
+  \item Did you state the full set of assumptions of all theoretical results?
439
+    \answerTODO{}
440
+	\item Did you include complete proofs of all theoretical results?
441
+    \answerTODO{}
442
+\end{enumerate}
443
+
444
+\item If you ran experiments...
445
+\begin{enumerate}
446
+  \item Did you include the code, data, and instructions needed to reproduce the main experimental results (either in the supplemental material or as a URL)?
447
+    \answerTODO{}
448
+  \item Did you specify all the training details (e.g., data splits, hyperparameters, how they were chosen)?
449
+    \answerTODO{}
450
+	\item Did you report error bars (e.g., with respect to the random seed after running experiments multiple times)?
451
+    \answerTODO{}
452
+	\item Did you include the total amount of compute and the type of resources used (e.g., type of GPUs, internal cluster, or cloud provider)?
453
+    \answerTODO{}
454
+\end{enumerate}
455
+
456
+\item If you are using existing assets (e.g., code, data, models) or curating/releasing new assets...
457
+\begin{enumerate}
458
+  \item If your work uses existing assets, did you cite the creators?
459
+    \answerTODO{}
460
+  \item Did you mention the license of the assets?
461
+    \answerTODO{}
462
+  \item Did you include any new assets either in the supplemental material or as a URL?
463
+    \answerTODO{}
464
+  \item Did you discuss whether and how consent was obtained from people whose data you're using/curating?
465
+    \answerTODO{}
466
+  \item Did you discuss whether the data you are using/curating contains personally identifiable information or offensive content?
467
+    \answerTODO{}
468
+\end{enumerate}
469
+
470
+\item If you used crowdsourcing or conducted research with human subjects...
471
+\begin{enumerate}
472
+  \item Did you include the full text of instructions given to participants and screenshots, if applicable?
473
+    \answerTODO{}
474
+  \item Did you describe any potential participant risks, with links to Institutional Review Board (IRB) approvals, if applicable?
475
+    \answerTODO{}
476
+  \item Did you include the estimated hourly wage paid to participants and the total amount spent on participant compensation?
477
+    \answerTODO{}
478
+\end{enumerate}
479
+
480
+\end{enumerate}
481
+
482
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
483
+
484
+\appendix
485
+
486
+\section{Appendix}
487
+
488
+Optionally include extra information (complete proofs, additional experiments and plots) in the appendix.
489
+This section will often be part of the supplemental material.
490
+
491
+\end{document}

+ 58
- 0
doc/projectregistration2022/projectregistration.tex 查看文件

@@ -0,0 +1,58 @@
1
+\documentclass{article}
2
+
3
+% if you need to pass options to natbib, use, e.g.:
4
+%     \PassOptionsToPackage{numbers, compress}{natbib}
5
+% before loading neurips_2021
6
+
7
+% ready for submission
8
+\usepackage[preprint]{neurips_2021}
9
+
10
+% to compile a preprint version, e.g., for submission to arXiv, add add the
11
+% [preprint] option:
12
+%     \usepackage[preprint]{neurips_2021}
13
+
14
+% to compile a camera-ready version, add the [final] option, e.g.:
15
+%     \usepackage[final]{neurips_2021}
16
+
17
+% to avoid loading the natbib package, add option nonatbib:
18
+%    \usepackage[nonatbib]{neurips_2021}
19
+
20
+\usepackage[utf8]{inputenc} % allow utf-8 input
21
+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
22
+\usepackage[colorlinks=true]{hyperref}       % hyperlinks
23
+\usepackage{url}            % simple URL typesetting
24
+\usepackage{booktabs}       % professional-quality tables
25
+\usepackage{amsfonts}       % blackboard math symbols
26
+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
27
+\usepackage{microtype}      % microtypography
28
+\usepackage{xcolor}         % colors
29
+
30
+\title{Analyzing Gender Share\\in Casting Actors}
31
+
32
+% The \author macro works with any number of authors. There are two commands
33
+% used to separate the names and addresses of multiple authors: \And and \AND.
34
+%
35
+% Using \And between authors leaves it to LaTeX to determine where to break the
36
+% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
37
+% authors names on the first line, and the last on the second line, try using
38
+% \AND instead of \And before the third author name.
39
+
40
+\author{%
41
+  Sophia Herrmann\\
42
+  Matrikelnummer 5688690\\
43
+  \texttt{so.herrmann@student.uni-tuebingen.de} \\
44
+  \And
45
+  Tobias Stumpp\\
46
+  Matrikelnummer 3798377\\
47
+  \texttt{tobias.stumpp@student.uni-tuebingen.de} \\
48
+}
49
+
50
+\begin{document}
51
+
52
+\maketitle
53
+
54
+\begin{abstract}
55
+  We are planning to use a dataset on \href{https://datasets.imdbws.com/name.basics.tsv.gz}{film-actors}, \href{https://datasets.imdbws.com/title.basics.tsv.gz}{film-titles}, \href{https://datasets.imdbws.com/title.ratings.tsv.gz}{film-ratings} from the \href{https://imdb.com}{IMDb} to examine how the female share on the cast of actors has changed over years. We want to look at when and in which genres the gender share has changed. We want to see if we can find correlations of film ratings and genres on gender share, and, if applicable, see how well film rating can be predicted.
56
+\end{abstract}
57
+
58
+\end{document}

+ 57
- 0
doc/projectsubmission2022/bibliography.bib 查看文件

@@ -0,0 +1,57 @@
1
+@online{gitrepo,
2
+  title   = "{Code-Repository - Gender-Share-in-Casting-Actors\_DL-WS2122\_public}",
3
+  url     = "{https://coreco.samstagskind.de/tobi/Gender-Share-in-Casting-Actors_DL-WS2122}",
4
+  urldate = "{2022-02-07}",
5
+  data    = "{2022-02-07}"
6
+}
7
+
8
+@online{bechdeltestgoogletrends,
9
+  title   = "{Bendchel test - Explore - Google Trends}",
10
+  url     = "{https://trends.google.com/trends/explore?hl=en&date=all&q=%2Fm%2F0kfxr6x}",
11
+  urldate = "{2022-02-06}",
12
+  data    = "{2022-01-30}"
13
+}
14
+
15
+@online{imdbiface,
16
+  title   = "{IMDb Datasets}",
17
+  url     = "{https://www.imdb.com/interfaces/}",
18
+  urldate = "{2022-01-30}",
19
+  data    = "{2022-01-30}"
20
+}
21
+
22
+@online{imdbws,
23
+  title   = "{IMDb data files available for download}",
24
+  url     = "{https://datasets.imdbws.com/}",
25
+  urldate = "{2022-01-30}",
26
+  data    = "{2022-01-30}"
27
+}
28
+
29
+@online{moviepilotfilmebechtelmehrkohle,
30
+  title   = "{Filme, die den Bechdel-Test bestehen, bringen mehr Kohle}",
31
+  url     = "{https://www.moviepilot.de/news/filme-die-den-bechdel-test-bestehen-bringen-mehr-kohle-128899}",
32
+  urldate = "{2022-02-06}",
33
+  data    = "{2014-04-03}"
34
+}
35
+https://www.moviepilot.de/news/filme-die-den-bechdel-test-bestehen-bringen-mehr-kohle-128899
36
+
37
+@online{fivethirtyeightexclusionwomen,
38
+  title   = "{The Dollar-And-Cents Case Against Hollywood’s Exclusion of Women | FiveThirtyEight}",
39
+  url     = "{https://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women/}",
40
+  urldate = "{2022-02-06}",
41
+  data    = "{2014-04-01}"
42
+}
43
+
44
+@online{bechdeltestwikien,
45
+  title   = "{Bechdel test - Wikipedia}",
46
+  url     = "{https://en.wikipedia.org/wiki/Bechdel_test}",
47
+  urldate = "{2022-02-06}",
48
+  data    = "{2022-01-02}"
49
+}
50
+
51
+@online{dtwofblog,
52
+  title   = "{DTWOF: The Blog: The Rule}",
53
+  url     = "{https://alisonbechdel.blogspot.com/2005/08/rule.html}",
54
+  urldate = "{2022-02-06}",
55
+  data    = "{2005-08-16}",
56
+  author  = "{Alison Bechdel}"
57
+}

二進制
doc/projectsubmission2022/fig-001_Share-in-principal-cast-of-actresses-in-all-movies-1980-2020.png 查看文件


+ 377
- 0
doc/projectsubmission2022/neurips_2021.sty 查看文件

@@ -0,0 +1,377 @@
1
+% partial rewrite of the LaTeX2e package for submissions to the
2
+% Conference on Neural Information Processing Systems (NeurIPS):
3
+%
4
+% - uses more LaTeX conventions
5
+% - line numbers at submission time replaced with aligned numbers from
6
+%   lineno package
7
+% - \nipsfinalcopy replaced with [final] package option
8
+% - automatically loads times package for authors
9
+% - loads natbib automatically; this can be suppressed with the
10
+%   [nonatbib] package option
11
+% - adds foot line to first page identifying the conference
12
+% - adds preprint option for submission to e.g. arXiv
13
+% - conference acronym modified
14
+%
15
+% Roman Garnett (garnett@wustl.edu) and the many authors of
16
+% nips15submit_e.sty, including MK and drstrip@sandia
17
+%
18
+% last revision: March 2021
19
+
20
+\NeedsTeXFormat{LaTeX2e}
21
+\ProvidesPackage{neurips_2021}[2021/03/31 NeurIPS 2021 submission/camera-ready style file]
22
+
23
+% declare final option, which creates camera-ready copy
24
+\newif\if@neuripsfinal\@neuripsfinalfalse
25
+\DeclareOption{final}{
26
+  \@neuripsfinaltrue
27
+}
28
+
29
+% declare nonatbib option, which does not load natbib in case of
30
+% package clash (users can pass options to natbib via
31
+% \PassOptionsToPackage)
32
+\newif\if@natbib\@natbibtrue
33
+\DeclareOption{nonatbib}{
34
+  \@natbibfalse
35
+}
36
+
37
+% declare preprint option, which creates a preprint version ready for
38
+% upload to, e.g., arXiv
39
+\newif\if@preprint\@preprintfalse
40
+\DeclareOption{preprint}{
41
+  \@preprinttrue
42
+}
43
+
44
+\ProcessOptions\relax
45
+
46
+% determine whether this is an anonymized submission
47
+\newif\if@submission\@submissiontrue
48
+\if@neuripsfinal\@submissionfalse\fi
49
+\if@preprint\@submissionfalse\fi
50
+
51
+% fonts
52
+\renewcommand{\rmdefault}{ptm}
53
+\renewcommand{\sfdefault}{phv}
54
+
55
+% change this every year for notice string at bottom
56
+\newcommand{\@neuripsordinal}{35th}
57
+\newcommand{\@neuripsyear}{2021}
58
+\newcommand{\@neuripslocation}{virtual}
59
+
60
+% acknowledgments
61
+\usepackage{environ}
62
+\newcommand{\acksection}{\section*{Acknowledgments and Disclosure of Funding}}
63
+\NewEnviron{ack}{%
64
+  \acksection
65
+  \BODY
66
+}
67
+
68
+% handle tweaks for camera-ready copy vs. submission copy
69
+\if@preprint
70
+  \newcommand{\@noticestring}{%
71
+    Project Report for \emph{Data Literacy} 2021/22
72
+  }
73
+\else
74
+  \if@neuripsfinal
75
+    \newcommand{\@noticestring}{%
76
+      \@neuripsordinal\/ Conference on Neural Information Processing Systems
77
+      (NeurIPS \@neuripsyear).%, \@neuripslocation.%
78
+    }
79
+  \else
80
+    \newcommand{\@noticestring}{%
81
+      Submitted to \@neuripsordinal\/ Conference on Neural Information
82
+      Processing Systems (NeurIPS \@neuripsyear). Do not distribute.%
83
+    }
84
+
85
+    % hide the acknowledgements
86
+    \NewEnviron{hide}{}
87
+    \let\ack\hide
88
+    \let\endack\endhide
89
+
90
+    % line numbers for submission
91
+    \RequirePackage{lineno}
92
+    \linenumbers
93
+
94
+    % fix incompatibilities between lineno and amsmath, if required, by
95
+    % transparently wrapping linenomath environments around amsmath
96
+    % environments
97
+    \AtBeginDocument{%
98
+      \@ifpackageloaded{amsmath}{%
99
+        \newcommand*\patchAmsMathEnvironmentForLineno[1]{%
100
+          \expandafter\let\csname old#1\expandafter\endcsname\csname #1\endcsname
101
+          \expandafter\let\csname oldend#1\expandafter\endcsname\csname end#1\endcsname
102
+          \renewenvironment{#1}%
103
+                           {\linenomath\csname old#1\endcsname}%
104
+                           {\csname oldend#1\endcsname\endlinenomath}%
105
+        }%
106
+        \newcommand*\patchBothAmsMathEnvironmentsForLineno[1]{%
107
+          \patchAmsMathEnvironmentForLineno{#1}%
108
+          \patchAmsMathEnvironmentForLineno{#1*}%
109
+        }%
110
+        \patchBothAmsMathEnvironmentsForLineno{equation}%
111
+        \patchBothAmsMathEnvironmentsForLineno{align}%
112
+        \patchBothAmsMathEnvironmentsForLineno{flalign}%
113
+        \patchBothAmsMathEnvironmentsForLineno{alignat}%
114
+        \patchBothAmsMathEnvironmentsForLineno{gather}%
115
+        \patchBothAmsMathEnvironmentsForLineno{multline}%
116
+      }{}
117
+    }
118
+  \fi
119
+\fi
120
+
121
+% load natbib unless told otherwise
122
+\if@natbib
123
+  \RequirePackage{natbib}
124
+\fi
125
+
126
+% set page geometry
127
+\usepackage[verbose=true,letterpaper]{geometry}
128
+\AtBeginDocument{
129
+  \newgeometry{
130
+    textheight=9in,
131
+    textwidth=5.5in,
132
+    top=1in,
133
+    headheight=12pt,
134
+    headsep=25pt,
135
+    footskip=30pt
136
+  }
137
+  \@ifpackageloaded{fullpage}
138
+    {\PackageWarning{neurips_2021}{fullpage package not allowed! Overwriting formatting.}}
139
+    {}
140
+}
141
+
142
+\widowpenalty=10000
143
+\clubpenalty=10000
144
+\flushbottom
145
+\sloppy
146
+
147
+% font sizes with reduced leading
148
+\renewcommand{\normalsize}{%
149
+  \@setfontsize\normalsize\@xpt\@xipt
150
+  \abovedisplayskip      7\p@ \@plus 2\p@ \@minus 5\p@
151
+  \abovedisplayshortskip \z@ \@plus 3\p@
152
+  \belowdisplayskip      \abovedisplayskip
153
+  \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@
154
+}
155
+\normalsize
156
+\renewcommand{\small}{%
157
+  \@setfontsize\small\@ixpt\@xpt
158
+  \abovedisplayskip      6\p@ \@plus 1.5\p@ \@minus 4\p@
159
+  \abovedisplayshortskip \z@  \@plus 2\p@
160
+  \belowdisplayskip      \abovedisplayskip
161
+  \belowdisplayshortskip 3\p@ \@plus 2\p@   \@minus 2\p@
162
+}
163
+\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt}
164
+\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt}
165
+\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt}
166
+\renewcommand{\large}{\@setfontsize\large\@xiipt{14}}
167
+\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}}
168
+\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}}
169
+\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}}
170
+\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}}
171
+
172
+% sections with less space
173
+\providecommand{\section}{}
174
+\renewcommand{\section}{%
175
+  \@startsection{section}{1}{\z@}%
176
+                {-2.0ex \@plus -0.5ex \@minus -0.2ex}%
177
+                { 1.5ex \@plus  0.3ex \@minus  0.2ex}%
178
+                {\large\bf\raggedright}%
179
+}
180
+\providecommand{\subsection}{}
181
+\renewcommand{\subsection}{%
182
+  \@startsection{subsection}{2}{\z@}%
183
+                {-1.8ex \@plus -0.5ex \@minus -0.2ex}%
184
+                { 0.8ex \@plus  0.2ex}%
185
+                {\normalsize\bf\raggedright}%
186
+}
187
+\providecommand{\subsubsection}{}
188
+\renewcommand{\subsubsection}{%
189
+  \@startsection{subsubsection}{3}{\z@}%
190
+                {-1.5ex \@plus -0.5ex \@minus -0.2ex}%
191
+                { 0.5ex \@plus  0.2ex}%
192
+                {\normalsize\bf\raggedright}%
193
+}
194
+\providecommand{\paragraph}{}
195
+\renewcommand{\paragraph}{%
196
+  \@startsection{paragraph}{4}{\z@}%
197
+                {1.5ex \@plus 0.5ex \@minus 0.2ex}%
198
+                {-1em}%
199
+                {\normalsize\bf}%
200
+}
201
+\providecommand{\subparagraph}{}
202
+\renewcommand{\subparagraph}{%
203
+  \@startsection{subparagraph}{5}{\z@}%
204
+                {1.5ex \@plus 0.5ex \@minus 0.2ex}%
205
+                {-1em}%
206
+                {\normalsize\bf}%
207
+}
208
+\providecommand{\subsubsubsection}{}
209
+\renewcommand{\subsubsubsection}{%
210
+  \vskip5pt{\noindent\normalsize\rm\raggedright}%
211
+}
212
+
213
+% float placement
214
+\renewcommand{\topfraction      }{0.85}
215
+\renewcommand{\bottomfraction   }{0.4}
216
+\renewcommand{\textfraction     }{0.1}
217
+\renewcommand{\floatpagefraction}{0.7}
218
+
219
+\newlength{\@neuripsabovecaptionskip}\setlength{\@neuripsabovecaptionskip}{7\p@}
220
+\newlength{\@neuripsbelowcaptionskip}\setlength{\@neuripsbelowcaptionskip}{\z@}
221
+
222
+\setlength{\abovecaptionskip}{\@neuripsabovecaptionskip}
223
+\setlength{\belowcaptionskip}{\@neuripsbelowcaptionskip}
224
+
225
+% swap above/belowcaptionskip lengths for tables
226
+\renewenvironment{table}
227
+  {\setlength{\abovecaptionskip}{\@neuripsbelowcaptionskip}%
228
+   \setlength{\belowcaptionskip}{\@neuripsabovecaptionskip}%
229
+   \@float{table}}
230
+  {\end@float}
231
+
232
+% footnote formatting
233
+\setlength{\footnotesep }{6.65\p@}
234
+\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@}
235
+\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@}
236
+\setcounter{footnote}{0}
237
+
238
+% paragraph formatting
239
+\setlength{\parindent}{\z@}
240
+\setlength{\parskip  }{5.5\p@}
241
+
242
+% list formatting
243
+\setlength{\topsep       }{4\p@ \@plus 1\p@   \@minus 2\p@}
244
+\setlength{\partopsep    }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@}
245
+\setlength{\itemsep      }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
246
+\setlength{\parsep       }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
247
+\setlength{\leftmargin   }{3pc}
248
+\setlength{\leftmargini  }{\leftmargin}
249
+\setlength{\leftmarginii }{2em}
250
+\setlength{\leftmarginiii}{1.5em}
251
+\setlength{\leftmarginiv }{1.0em}
252
+\setlength{\leftmarginv  }{0.5em}
253
+\def\@listi  {\leftmargin\leftmargini}
254
+\def\@listii {\leftmargin\leftmarginii
255
+              \labelwidth\leftmarginii
256
+              \advance\labelwidth-\labelsep
257
+              \topsep  2\p@ \@plus 1\p@    \@minus 0.5\p@
258
+              \parsep  1\p@ \@plus 0.5\p@ \@minus 0.5\p@
259
+              \itemsep \parsep}
260
+\def\@listiii{\leftmargin\leftmarginiii
261
+              \labelwidth\leftmarginiii
262
+              \advance\labelwidth-\labelsep
263
+              \topsep    1\p@ \@plus 0.5\p@ \@minus 0.5\p@
264
+              \parsep    \z@
265
+              \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@
266
+              \itemsep \topsep}
267
+\def\@listiv {\leftmargin\leftmarginiv
268
+              \labelwidth\leftmarginiv
269
+              \advance\labelwidth-\labelsep}
270
+\def\@listv  {\leftmargin\leftmarginv
271
+              \labelwidth\leftmarginv
272
+              \advance\labelwidth-\labelsep}
273
+\def\@listvi {\leftmargin\leftmarginvi
274
+              \labelwidth\leftmarginvi
275
+              \advance\labelwidth-\labelsep}
276
+
277
+% create title
278
+\providecommand{\maketitle}{}
279
+\renewcommand{\maketitle}{%
280
+  \par
281
+  \begingroup
282
+    \renewcommand{\thefootnote}{\fnsymbol{footnote}}
283
+    % for perfect author name centering
284
+    \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}}
285
+    % The footnote-mark was overlapping the footnote-text,
286
+    % added the following to fix this problem               (MK)
287
+    \long\def\@makefntext##1{%
288
+      \parindent 1em\noindent
289
+      \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1
290
+    }
291
+    \thispagestyle{empty}
292
+    \@maketitle
293
+    \@thanks
294
+    \@notice
295
+  \endgroup
296
+  \let\maketitle\relax
297
+  \let\thanks\relax
298
+}
299
+
300
+% rules for title box at top of first page
301
+\newcommand{\@toptitlebar}{
302
+  \hrule height 4\p@
303
+  \vskip 0.25in
304
+  \vskip -\parskip%
305
+}
306
+\newcommand{\@bottomtitlebar}{
307
+  \vskip 0.29in
308
+  \vskip -\parskip
309
+  \hrule height 1\p@
310
+  \vskip 0.09in%
311
+}
312
+
313
+% create title (includes both anonymized and non-anonymized versions)
314
+\providecommand{\@maketitle}{}
315
+\renewcommand{\@maketitle}{%
316
+  \vbox{%
317
+    \hsize\textwidth
318
+    \linewidth\hsize
319
+    \vskip 0.1in
320
+    \@toptitlebar
321
+    \centering
322
+    {\LARGE\bf \@title\par}
323
+    \@bottomtitlebar
324
+    \if@submission
325
+      \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}
326
+        Anonymous Author(s) \\
327
+        Affiliation \\
328
+        Address \\
329
+        \texttt{email} \\
330
+      \end{tabular}%
331
+    \else
332
+      \def\And{%
333
+        \end{tabular}\hfil\linebreak[0]\hfil%
334
+        \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
335
+      }
336
+      \def\AND{%
337
+        \end{tabular}\hfil\linebreak[4]\hfil%
338
+        \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
339
+      }
340
+      \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}%
341
+    \fi
342
+    \vskip 0.3in \@minus 0.1in
343
+  }
344
+}
345
+
346
+% add conference notice to bottom of first page
347
+\newcommand{\ftype@noticebox}{8}
348
+\newcommand{\@notice}{%
349
+  % give a bit of extra room back to authors on first page
350
+  \enlargethispage{2\baselineskip}%
351
+  \@float{noticebox}[b]%
352
+    \footnotesize\@noticestring%
353
+  \end@float%
354
+}
355
+
356
+% abstract styling
357
+\renewenvironment{abstract}%
358
+{%
359
+  \vskip 0.075in%
360
+  \centerline%
361
+  {\large\bf Abstract}%
362
+  \vspace{0.5ex}%
363
+  \begin{quote}%
364
+}
365
+{
366
+  \par%
367
+  \end{quote}%
368
+  \vskip 1ex%
369
+}
370
+
371
+% For the paper checklist
372
+\newcommand{\answerYes}[1][]{\textcolor{blue}{[Yes] #1}}
373
+\newcommand{\answerNo}[1][]{\textcolor{orange}{[No] #1}}
374
+\newcommand{\answerNA}[1][]{\textcolor{gray}{[N/A] #1}}
375
+\newcommand{\answerTODO}[1][]{\textcolor{red}{\bf [TODO]}}
376
+
377
+\endinput

+ 491
- 0
doc/projectsubmission2022/neurips_2021.tex 查看文件

@@ -0,0 +1,491 @@
1
+\documentclass{article}
2
+
3
+% if you need to pass options to natbib, use, e.g.:
4
+%     \PassOptionsToPackage{numbers, compress}{natbib}
5
+% before loading neurips_2021
6
+
7
+% ready for submission
8
+\usepackage[preprint]{neurips_2021}
9
+
10
+% to compile a preprint version, e.g., for submission to arXiv, add add the
11
+% [preprint] option:
12
+%     \usepackage[preprint]{neurips_2021}
13
+
14
+% to compile a camera-ready version, add the [final] option, e.g.:
15
+%     \usepackage[final]{neurips_2021}
16
+
17
+% to avoid loading the natbib package, add option nonatbib:
18
+%    \usepackage[nonatbib]{neurips_2021}
19
+
20
+\usepackage[utf8]{inputenc} % allow utf-8 input
21
+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
22
+\usepackage{hyperref}       % hyperlinks
23
+\usepackage{url}            % simple URL typesetting
24
+\usepackage{booktabs}       % professional-quality tables
25
+\usepackage{amsfonts}       % blackboard math symbols
26
+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
27
+\usepackage{microtype}      % microtypography
28
+\usepackage{xcolor}         % colors
29
+
30
+\title{Formatting Instructions For NeurIPS 2021}
31
+
32
+% The \author macro works with any number of authors. There are two commands
33
+% used to separate the names and addresses of multiple authors: \And and \AND.
34
+%
35
+% Using \And between authors leaves it to LaTeX to determine where to break the
36
+% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
37
+% authors names on the first line, and the last on the second line, try using
38
+% \AND instead of \And before the third author name.
39
+
40
+\author{%
41
+  David S.~Hippocampus\thanks{Use footnote for providing further information
42
+    about author (webpage, alternative address)---\emph{not} for acknowledging
43
+    funding agencies.} \\
44
+  Department of Computer Science\\
45
+  Cranberry-Lemon University\\
46
+  Pittsburgh, PA 15213 \\
47
+  \texttt{hippo@cs.cranberry-lemon.edu} \\
48
+  % examples of more authors
49
+  % \And
50
+  % Coauthor \\
51
+  % Affiliation \\
52
+  % Address \\
53
+  % \texttt{email} \\
54
+  % \AND
55
+  % Coauthor \\
56
+  % Affiliation \\
57
+  % Address \\
58
+  % \texttt{email} \\
59
+  % \And
60
+  % Coauthor \\
61
+  % Affiliation \\
62
+  % Address \\
63
+  % \texttt{email} \\
64
+  % \And
65
+  % Coauthor \\
66
+  % Affiliation \\
67
+  % Address \\
68
+  % \texttt{email} \\
69
+}
70
+
71
+\begin{document}
72
+
73
+\maketitle
74
+
75
+\begin{abstract}
76
+  The abstract paragraph should be indented \nicefrac{1}{2}~inch (3~picas) on
77
+  both the left- and right-hand margins. Use 10~point type, with a vertical
78
+  spacing (leading) of 11~points.  The word \textbf{Abstract} must be centered,
79
+  bold, and in point size 12. Two line spaces precede the abstract. The abstract
80
+  must be limited to one paragraph.
81
+\end{abstract}
82
+
83
+\section{Submission of papers to NeurIPS 2021}
84
+
85
+Please read the instructions below carefully and follow them faithfully.
86
+
87
+\subsection{Style}
88
+
89
+Papers to be submitted to NeurIPS 2021 must be prepared according to the
90
+instructions presented here. Papers may only be up to {\bf nine} pages long,
91
+including figures. Additional pages \emph{containing only acknowledgments and
92
+references} are allowed. Papers that exceed the page limit will not be
93
+reviewed, or in any other way considered for presentation at the conference.
94
+
95
+The margins in 2021 are the same as those in 2007, which allow for $\sim$$15\%$
96
+more words in the paper compared to earlier years.
97
+
98
+Authors are required to use the NeurIPS \LaTeX{} style files obtainable at the
99
+NeurIPS website as indicated below. Please make sure you use the current files
100
+and not previous versions. Tweaking the style files may be grounds for
101
+rejection.
102
+
103
+\subsection{Retrieval of style files}
104
+
105
+The style files for NeurIPS and other conference information are available on
106
+the World Wide Web at
107
+\begin{center}
108
+  \url{http://www.neurips.cc/}
109
+\end{center}
110
+The file \verb+neurips_2021.pdf+ contains these instructions and illustrates the
111
+various formatting requirements your NeurIPS paper must satisfy.
112
+
113
+The only supported style file for NeurIPS 2021 is \verb+neurips_2021.sty+,
114
+rewritten for \LaTeXe{}.  \textbf{Previous style files for \LaTeX{} 2.09,
115
+  Microsoft Word, and RTF are no longer supported!}
116
+
117
+The \LaTeX{} style file contains three optional arguments: \verb+final+, which
118
+creates a camera-ready copy, \verb+preprint+, which creates a preprint for
119
+submission to, e.g., arXiv, and \verb+nonatbib+, which will not load the
120
+\verb+natbib+ package for you in case of package clash.
121
+
122
+\paragraph{Preprint option}
123
+If you wish to post a preprint of your work online, e.g., on arXiv, using the
124
+NeurIPS style, please use the \verb+preprint+ option. This will create a
125
+nonanonymized version of your work with the text ``Preprint. Work in progress.''
126
+in the footer. This version may be distributed as you see fit. Please \textbf{do
127
+  not} use the \verb+final+ option, which should \textbf{only} be used for
128
+papers accepted to NeurIPS.
129
+
130
+At submission time, please omit the \verb+final+ and \verb+preprint+
131
+options. This will anonymize your submission and add line numbers to aid
132
+review. Please do \emph{not} refer to these line numbers in your paper as they
133
+will be removed during generation of camera-ready copies.
134
+
135
+The file \verb+neurips_2021.tex+ may be used as a ``shell'' for writing your
136
+paper. All you have to do is replace the author, title, abstract, and text of
137
+the paper with your own.
138
+
139
+The formatting instructions contained in these style files are summarized in
140
+Sections \ref{gen_inst}, \ref{headings}, and \ref{others} below.
141
+
142
+\section{General formatting instructions}
143
+\label{gen_inst}
144
+
145
+The text must be confined within a rectangle 5.5~inches (33~picas) wide and
146
+9~inches (54~picas) long. The left margin is 1.5~inch (9~picas).  Use 10~point
147
+type with a vertical spacing (leading) of 11~points.  Times New Roman is the
148
+preferred typeface throughout, and will be selected for you by default.
149
+Paragraphs are separated by \nicefrac{1}{2}~line space (5.5 points), with no
150
+indentation.
151
+
152
+The paper title should be 17~point, initial caps/lower case, bold, centered
153
+between two horizontal rules. The top rule should be 4~points thick and the
154
+bottom rule should be 1~point thick. Allow \nicefrac{1}{4}~inch space above and
155
+below the title to rules. All pages should start at 1~inch (6~picas) from the
156
+top of the page.
157
+
158
+For the final version, authors' names are set in boldface, and each name is
159
+centered above the corresponding address. The lead author's name is to be listed
160
+first (left-most), and the co-authors' names (if different address) are set to
161
+follow. If there is only one co-author, list both author and co-author side by
162
+side.
163
+
164
+Please pay special attention to the instructions in Section \ref{others}
165
+regarding figures, tables, acknowledgments, and references.
166
+
167
+\section{Headings: first level}
168
+\label{headings}
169
+
170
+All headings should be lower case (except for first word and proper nouns),
171
+flush left, and bold.
172
+
173
+First-level headings should be in 12-point type.
174
+
175
+\subsection{Headings: second level}
176
+
177
+Second-level headings should be in 10-point type.
178
+
179
+\subsubsection{Headings: third level}
180
+
181
+Third-level headings should be in 10-point type.
182
+
183
+\paragraph{Paragraphs}
184
+
185
+There is also a \verb+\paragraph+ command available, which sets the heading in
186
+bold, flush left, and inline with the text, with the heading followed by 1\,em
187
+of space.
188
+
189
+\section{Citations, figures, tables, references}
190
+\label{others}
191
+
192
+These instructions apply to everyone.
193
+
194
+\subsection{Citations within the text}
195
+
196
+The \verb+natbib+ package will be loaded for you by default.  Citations may be
197
+author/year or numeric, as long as you maintain internal consistency.  As to the
198
+format of the references themselves, any style is acceptable as long as it is
199
+used consistently.
200
+
201
+The documentation for \verb+natbib+ may be found at
202
+\begin{center}
203
+  \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf}
204
+\end{center}
205
+Of note is the command \verb+\citet+, which produces citations appropriate for
206
+use in inline text.  For example,
207
+\begin{verbatim}
208
+   \citet{hasselmo} investigated\dots
209
+\end{verbatim}
210
+produces
211
+\begin{quote}
212
+  Hasselmo, et al.\ (1995) investigated\dots
213
+\end{quote}
214
+
215
+If you wish to load the \verb+natbib+ package with options, you may add the
216
+following before loading the \verb+neurips_2021+ package:
217
+\begin{verbatim}
218
+   \PassOptionsToPackage{options}{natbib}
219
+\end{verbatim}
220
+
221
+If \verb+natbib+ clashes with another package you load, you can add the optional
222
+argument \verb+nonatbib+ when loading the style file:
223
+\begin{verbatim}
224
+   \usepackage[nonatbib]{neurips_2021}
225
+\end{verbatim}
226
+
227
+As submission is double blind, refer to your own published work in the third
228
+person. That is, use ``In the previous work of Jones et al.\ [4],'' not ``In our
229
+previous work [4].'' If you cite your other papers that are not widely available
230
+(e.g., a journal paper under review), use anonymous author names in the
231
+citation, e.g., an author of the form ``A.\ Anonymous.''
232
+
233
+\subsection{Footnotes}
234
+
235
+Footnotes should be used sparingly.  If you do require a footnote, indicate
236
+footnotes with a number\footnote{Sample of the first footnote.} in the
237
+text. Place the footnotes at the bottom of the page on which they appear.
238
+Precede the footnote with a horizontal rule of 2~inches (12~picas).
239
+
240
+Note that footnotes are properly typeset \emph{after} punctuation
241
+marks.\footnote{As in this example.}
242
+
243
+\subsection{Figures}
244
+
245
+\begin{figure}
246
+  \centering
247
+  \fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
248
+  \caption{Sample figure caption.}
249
+\end{figure}
250
+
251
+All artwork must be neat, clean, and legible. Lines should be dark enough for
252
+purposes of reproduction. The figure number and caption always appear after the
253
+figure. Place one line space before the figure caption and one line space after
254
+the figure. The figure caption should be lower case (except for first word and
255
+proper nouns); figures are numbered consecutively.
256
+
257
+You may use color figures.  However, it is best for the figure captions and the
258
+paper body to be legible if the paper is printed in either black/white or in
259
+color.
260
+
261
+\subsection{Tables}
262
+
263
+All tables must be centered, neat, clean and legible.  The table number and
264
+title always appear before the table.  See Table~\ref{sample-table}.
265
+
266
+Place one line space before the table title, one line space after the
267
+table title, and one line space after the table. The table title must
268
+be lower case (except for first word and proper nouns); tables are
269
+numbered consecutively.
270
+
271
+Note that publication-quality tables \emph{do not contain vertical rules.} We
272
+strongly suggest the use of the \verb+booktabs+ package, which allows for
273
+typesetting high-quality, professional tables:
274
+\begin{center}
275
+  \url{https://www.ctan.org/pkg/booktabs}
276
+\end{center}
277
+This package was used to typeset Table~\ref{sample-table}.
278
+
279
+\begin{table}
280
+  \caption{Sample table title}
281
+  \label{sample-table}
282
+  \centering
283
+  \begin{tabular}{lll}
284
+    \toprule
285
+    \multicolumn{2}{c}{Part}                   \\
286
+    \cmidrule(r){1-2}
287
+    Name     & Description     & Size ($\mu$m) \\
288
+    \midrule
289
+    Dendrite & Input terminal  & $\sim$100     \\
290
+    Axon     & Output terminal & $\sim$10      \\
291
+    Soma     & Cell body       & up to $10^6$  \\
292
+    \bottomrule
293
+  \end{tabular}
294
+\end{table}
295
+
296
+\section{Final instructions}
297
+
298
+Do not change any aspects of the formatting parameters in the style files.  In
299
+particular, do not modify the width or length of the rectangle the text should
300
+fit into, and do not change font sizes (except perhaps in the
301
+\textbf{References} section; see below). Please note that pages should be
302
+numbered.
303
+
304
+\section{Preparing PDF files}
305
+
306
+Please prepare submission files with paper size ``US Letter,'' and not, for
307
+example, ``A4.''
308
+
309
+Fonts were the main cause of problems in the past years. Your PDF file must only
310
+contain Type 1 or Embedded TrueType fonts. Here are a few instructions to
311
+achieve this.
312
+
313
+\begin{itemize}
314
+
315
+\item You should directly generate PDF files using \verb+pdflatex+.
316
+
317
+\item You can check which fonts a PDF files uses.  In Acrobat Reader, select the
318
+  menu Files$>$Document Properties$>$Fonts and select Show All Fonts. You can
319
+  also use the program \verb+pdffonts+ which comes with \verb+xpdf+ and is
320
+  available out-of-the-box on most Linux machines.
321
+
322
+\item The IEEE has recommendations for generating PDF files whose fonts are also
323
+  acceptable for NeurIPS. Please see
324
+  \url{http://www.emfield.org/icuwb2010/downloads/IEEE-PDF-SpecV32.pdf}
325
+
326
+\item \verb+xfig+ "patterned" shapes are implemented with bitmap fonts.  Use
327
+  "solid" shapes instead.
328
+
329
+\item The \verb+\bbold+ package almost always uses bitmap fonts.  You should use
330
+  the equivalent AMS Fonts:
331
+\begin{verbatim}
332
+   \usepackage{amsfonts}
333
+\end{verbatim}
334
+followed by, e.g., \verb+\mathbb{R}+, \verb+\mathbb{N}+, or \verb+\mathbb{C}+
335
+for $\mathbb{R}$, $\mathbb{N}$ or $\mathbb{C}$.  You can also use the following
336
+workaround for reals, natural and complex:
337
+\begin{verbatim}
338
+   \newcommand{\RR}{I\!\!R} %real numbers
339
+   \newcommand{\Nat}{I\!\!N} %natural numbers
340
+   \newcommand{\CC}{I\!\!\!\!C} %complex numbers
341
+\end{verbatim}
342
+Note that \verb+amsfonts+ is automatically loaded by the \verb+amssymb+ package.
343
+
344
+\end{itemize}
345
+
346
+If your file contains type 3 fonts or non embedded TrueType fonts, we will ask
347
+you to fix it.
348
+
349
+\subsection{Margins in \LaTeX{}}
350
+
351
+Most of the margin problems come from figures positioned by hand using
352
+\verb+\special+ or other commands. We suggest using the command
353
+\verb+\includegraphics+ from the \verb+graphicx+ package. Always specify the
354
+figure width as a multiple of the line width as in the example below:
355
+\begin{verbatim}
356
+   \usepackage[pdftex]{graphicx} ...
357
+   \includegraphics[width=0.8\linewidth]{myfile.pdf}
358
+\end{verbatim}
359
+See Section 4.4 in the graphics bundle documentation
360
+(\url{http://mirrors.ctan.org/macros/latex/required/graphics/grfguide.pdf})
361
+
362
+A number of width problems arise when \LaTeX{} cannot properly hyphenate a
363
+line. Please give LaTeX hyphenation hints using the \verb+\-+ command when
364
+necessary.
365
+
366
+\begin{ack}
367
+Use unnumbered first level headings for the acknowledgments. All acknowledgments
368
+go at the end of the paper before the list of references. Moreover, you are required to declare
369
+funding (financial activities supporting the submitted work) and competing interests (related financial activities outside the submitted work).
370
+More information about this disclosure can be found at: \url{https://neurips.cc/Conferences/2021/PaperInformation/FundingDisclosure}.
371
+
372
+Do {\bf not} include this section in the anonymized submission, only in the final paper. You can use the \texttt{ack} environment provided in the style file to autmoatically hide this section in the anonymized submission.
373
+\end{ack}
374
+
375
+\section*{References}
376
+
377
+References follow the acknowledgments. Use unnumbered first-level heading for
378
+the references. Any choice of citation style is acceptable as long as you are
379
+consistent. It is permissible to reduce the font size to \verb+small+ (9 point)
380
+when listing the references.
381
+Note that the Reference section does not count towards the page limit.
382
+\medskip
383
+
384
+{
385
+\small
386
+
387
+[1] Alexander, J.A.\ \& Mozer, M.C.\ (1995) Template-based algorithms for
388
+connectionist rule extraction. In G.\ Tesauro, D.S.\ Touretzky and T.K.\ Leen
389
+(eds.), {\it Advances in Neural Information Processing Systems 7},
390
+pp.\ 609--616. Cambridge, MA: MIT Press.
391
+
392
+[2] Bower, J.M.\ \& Beeman, D.\ (1995) {\it The Book of GENESIS: Exploring
393
+  Realistic Neural Models with the GEneral NEural SImulation System.}  New York:
394
+TELOS/Springer--Verlag.
395
+
396
+[3] Hasselmo, M.E., Schnell, E.\ \& Barkai, E.\ (1995) Dynamics of learning and
397
+recall at excitatory recurrent synapses and cholinergic modulation in rat
398
+hippocampal region CA3. {\it Journal of Neuroscience} {\bf 15}(7):5249-5262.
399
+}
400
+
401
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
402
+\section*{Checklist}
403
+
404
+%%% BEGIN INSTRUCTIONS %%%
405
+The checklist follows the references.  Please
406
+read the checklist guidelines carefully for information on how to answer these
407
+questions.  For each question, change the default \answerTODO{} to \answerYes{},
408
+\answerNo{}, or \answerNA{}.  You are strongly encouraged to include a {\bf
409
+justification to your answer}, either by referencing the appropriate section of
410
+your paper or providing a brief inline description.  For example:
411
+\begin{itemize}
412
+  \item Did you include the license to the code and datasets? \answerYes{See Section~\ref{gen_inst}.}
413
+  \item Did you include the license to the code and datasets? \answerNo{The code and the data are proprietary.}
414
+  \item Did you include the license to the code and datasets? \answerNA{}
415
+\end{itemize}
416
+Please do not modify the questions and only use the provided macros for your
417
+answers.  Note that the Checklist section does not count towards the page
418
+limit.  In your paper, please delete this instructions block and only keep the
419
+Checklist section heading above along with the questions/answers below.
420
+%%% END INSTRUCTIONS %%%
421
+
422
+\begin{enumerate}
423
+
424
+\item For all authors...
425
+\begin{enumerate}
426
+  \item Do the main claims made in the abstract and introduction accurately reflect the paper's contributions and scope?
427
+    \answerTODO{}
428
+  \item Did you describe the limitations of your work?
429
+    \answerTODO{}
430
+  \item Did you discuss any potential negative societal impacts of your work?
431
+    \answerTODO{}
432
+  \item Have you read the ethics review guidelines and ensured that your paper conforms to them?
433
+    \answerTODO{}
434
+\end{enumerate}
435
+
436
+\item If you are including theoretical results...
437
+\begin{enumerate}
438
+  \item Did you state the full set of assumptions of all theoretical results?
439
+    \answerTODO{}
440
+	\item Did you include complete proofs of all theoretical results?
441
+    \answerTODO{}
442
+\end{enumerate}
443
+
444
+\item If you ran experiments...
445
+\begin{enumerate}
446
+  \item Did you include the code, data, and instructions needed to reproduce the main experimental results (either in the supplemental material or as a URL)?
447
+    \answerTODO{}
448
+  \item Did you specify all the training details (e.g., data splits, hyperparameters, how they were chosen)?
449
+    \answerTODO{}
450
+	\item Did you report error bars (e.g., with respect to the random seed after running experiments multiple times)?
451
+    \answerTODO{}
452
+	\item Did you include the total amount of compute and the type of resources used (e.g., type of GPUs, internal cluster, or cloud provider)?
453
+    \answerTODO{}
454
+\end{enumerate}
455
+
456
+\item If you are using existing assets (e.g., code, data, models) or curating/releasing new assets...
457
+\begin{enumerate}
458
+  \item If your work uses existing assets, did you cite the creators?
459
+    \answerTODO{}
460
+  \item Did you mention the license of the assets?
461
+    \answerTODO{}
462
+  \item Did you include any new assets either in the supplemental material or as a URL?
463
+    \answerTODO{}
464
+  \item Did you discuss whether and how consent was obtained from people whose data you're using/curating?
465
+    \answerTODO{}
466
+  \item Did you discuss whether the data you are using/curating contains personally identifiable information or offensive content?
467
+    \answerTODO{}
468
+\end{enumerate}
469
+
470
+\item If you used crowdsourcing or conducted research with human subjects...
471
+\begin{enumerate}
472
+  \item Did you include the full text of instructions given to participants and screenshots, if applicable?
473
+    \answerTODO{}
474
+  \item Did you describe any potential participant risks, with links to Institutional Review Board (IRB) approvals, if applicable?
475
+    \answerTODO{}
476
+  \item Did you include the estimated hourly wage paid to participants and the total amount spent on participant compensation?
477
+    \answerTODO{}
478
+\end{enumerate}
479
+
480
+\end{enumerate}
481
+
482
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
483
+
484
+\appendix
485
+
486
+\section{Appendix}
487
+
488
+Optionally include extra information (complete proofs, additional experiments and plots) in the appendix.
489
+This section will often be part of the supplemental material.
490
+
491
+\end{document}

+ 230
- 0
doc/projectsubmission2022/projectsubmission.tex 查看文件

@@ -0,0 +1,230 @@
1
+\documentclass{article}
2
+
3
+% if you need to pass options to natbib, use, e.g.:
4
+%     \PassOptionsToPackage{numbers, compress}{natbib}
5
+% before loading neurips_2021
6
+
7
+\bibliographystyle{unsrtnat}
8
+\PassOptionsToPackage{numbers, compress}{natbib}
9
+% ready for submission
10
+ 
11
+ \usepackage[preprint]{neurips_2021}
12
+%\usepackage[nonatbib,preprint]{neurips_2021}
13
+
14
+% to compile a preprint version, e.g., for submission to arXiv, add add the
15
+% [preprint] option:
16
+%     \usepackage[preprint]{neurips_2021}
17
+
18
+% to compile a camera-ready version, add the [final] option, e.g.:
19
+%     \usepackage[final]{neurips_2021}
20
+
21
+% to avoid loading the natbib package, add option nonatbib:
22
+%    \usepackage[nonatbib]{neurips_2021}
23
+
24
+\usepackage[utf8]{inputenc} % allow utf-8 input
25
+\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
26
+\usepackage[colorlinks=true]{hyperref}       % hyperlinks
27
+\usepackage{url}            % simple URL typesetting
28
+\usepackage{booktabs}       % professional-quality tables
29
+\usepackage{amsfonts}       % blackboard math symbols
30
+\usepackage{nicefrac}       % compact symbols for 1/2, etc.
31
+\usepackage{microtype}      % microtypography
32
+\usepackage{xcolor}         % colors
33
+\usepackage{graphicx} %package to manage images
34
+\usepackage[nodayofweek,level]{datetime}
35
+\usepackage{adjustbox}
36
+
37
+\title{Analyzing Gender Share\\in Casting Actors}
38
+
39
+% The \author macro works with any number of authors. There are two commands
40
+% used to separate the names and addresses of multiple authors: \And and \AND.
41
+%
42
+% Using \And between authors leaves it to LaTeX to determine where to break the
43
+% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
44
+% authors names on the first line, and the last on the second line, try using
45
+% \AND instead of \And before the third author name.
46
+
47
+\author{%
48
+  Sophia Herrmann\\
49
+  Matrikelnummer 5688690\\
50
+  \texttt{so.herrmann@student.uni-tuebingen.de} \\
51
+  \And
52
+  Tobias Stumpp\\
53
+  Matrikelnummer 3798377\\
54
+  \texttt{tobias.stumpp@student.uni-tuebingen.de} \\
55
+}
56
+
57
+\begin{document}
58
+
59
+\maketitle
60
+
61
+\begin{abstract}
62
+  We use the dataset on \href{https://datasets.imdbws.com/title.principals.tsv.gz}{film-principals}, \href{https://datasets.imdbws.com/title.basics.tsv.gz}{film-titles}, \href{https://datasets.imdbws.com/title.ratings.tsv.gz}{film-ratings} from the \href{https://imdb.com}{IMDb}~\citep{imdbiface,imdbws} to examine how the female share on the cast of principal actors has changed over years. We want to look at when and in which genres the gender share has changed. We want to see if we can find correlations of film ratings and genres on gender share, and, if applicable, see how well film rating can be predicted.
63
+\end{abstract}
64
+
65
+% - Wieso ist Gender Share/unsere Fragestellung von Interesse.
66
+%  - Bendchtel-Test-Ersatz
67
+%  - Fragen
68
+%    - "Bendchtel-Test hat Schlagzeilen um 2000 gemacht." Hat sich seither etwas verändert?
69
+%    - "Filme, die den Bendcheltest bestehen wären erfolgreicher." Stimmt das?
70
+%  - Ziel (kurz)
71
+%     - Wir untersuchen "Frage 1" mit, wollen Ergebnis ob..
72
+%     - Wir untersuchen "Frage 2" mit, wollen Ergebnis ob..
73
+% - Welche Daten haben wir
74
+%   - Datenvorstellung IMDb
75
+%   - Übersicht der Features
76
+% - Methoden
77
+%   - Beschreibung
78
+%   - (Vorraussetzungen/Assumptions)
79
+% - Analyse & Ergebnisse 
80
+%   - Datenanalyse
81
+%   - Statistiktests
82
+% - Probleme/Limitations
83
+% - Resümee
84
+
85
+
86
+\section{Impact of Bechdel test on the female share in principal cast}
87
+\label{sect_intro}
88
+
89
+In the context of gender equality, and inspired by the Bechdel test and a possible impact of the test, we aim to examine the gender balance in principal roles in movies by using IMDb data~\citep{imdbiface,imdbws} on movie casting.
90
+
91
+The Bechdel test is an indicator of active female roles in fiction. The basis for the test as understood today goes back to a comic strip from 1985, with criteria that can also be derived from the narrative: A woman explains that she will only go to movies that (1) feature at least two women (2) talking to each other (3) about something other than a man.~\cite{bechdeltestwikien,dtwofblog}
92
+The English Wikipedia page on the Bechdel test mentions two statements that we would like to examine within the scope of our possibilities on data analysis:
93
+
94
+\begin{enumerate}
95
+    \item "the test became more widely discussed in the 2000s"~\citep{bechdeltestwikien,bechdeltestgoogletrends}\\
96
+    We test: Did the proportion of women in principal roles in movies change after the year 2000?
97
+    \item "the films that passed the test had about a 37 percent higher return on investment (ROI)"\\
98
+    We test: Does the proportion of women in principal roles correlate with movie success?~\citep{bechdeltestwikien,fivethirtyeightexclusionwomen}
99
+\end{enumerate}
100
+
101
+We assume, the 2000s media attention of the Bechdel test leaded to both an increase in the popularity of movies with higher female share in principal cast, but also assume a trend in movie industry to cast more actresses in principal roles. Herein we find an incentive for further analysis regarding possible observable patterns in the share of female in principal cast and the popularity of movies. Herein we interpret 2000 as a critical year for a significant shift.
102
+
103
+In line with these assumptions, we test (1) for significant change of actress share in principal roles with year 2000, and we analyze (2) correlation and predictability between actress share and average rating as measure of popularity with years after 2000.
104
+
105
+% - Welche Daten haben wir 
106
+
107
+\section{Dataset description and preprocessing}
108
+\label{sect_dataset}
109
+We analyze data from the Internet Movie Database (IMDb), which provides a public subset for public research purposes. The IMDb as an online-platform provides users a retrieval and filing of detailed information on movies, television series, video productions, and computer games which provides a public subset for public research purposes. The public subset of IMDb api-retrievable-data includes movies from 1890 to the present day. The subset of the IMDb publicly provided data is regenerated daily. We make use the files and features as shown in table~\ref{feature_table}.
110
+
111
+\begin{table}
112
+  \caption{Files and features in use}
113
+  \label{feature_table}
114
+  \centering
115
+  \begin{adjustbox}{width=\columnwidth,center}
116
+  \begin{tabular}{lllp{12cm}}
117
+    \toprule
118
+    
119
+    File     & Feature & Type & Description \\
120
+    \midrule
121
+    film-principals\footnote{\url{https://datasets.imdbws.com/title.principals.tsv.gz}}
122
+    & tconst     & (string)  & alphanumeric unique identifier of the title \\ \cmidrule(r){2-4}
123
+    & nconst     & (string)  & alphanumeric unique identifier of the name/person \\ \cmidrule(r){2-4}
124
+    & category   & (string)  & the category of job that person was in \\
125
+    
126
+    \hline
127
+
128
+    film-titles\footnote{\url{https://datasets.imdbws.com/title.basics.tsv.gz}}
129
+    & tconst         & (string)       & alphanumeric unique identifier of the title \\ \cmidrule(r){2-4}
130
+    & titleType      & (string)       & the type/format of the title (e.g. movie, short, tvseries, tvepisode, video, etc) \\ \cmidrule(r){2-4}
131
+    & startYear      & (YYYY)         & represents the release year of a title. In the case of TV Series, it is the series start year \\ \cmidrule(r){2-4}
132
+    & runtimeMinutes & (integer)      & primary runtime of the title, in minutes \\ \cmidrule(r){2-4}
133
+    & genres         & (string array) & includes up to three genres associated with the title \\
134
+    
135
+    \hline
136
+    
137
+    film-ratings \footnote{\url{https://datasets.imdbws.com/title.ratings.tsv.gz}}
138
+    & tconst         & (string)  & alphanumeric unique identifier of the title \\ \cmidrule(r){2-4}
139
+    & averageRating  & (integer) & weighted average of all the individual user ratings \\ \cmidrule(r){2-4}
140
+    & numVotes       & (integer) & number of votes the title has received\\
141
+    
142
+    \bottomrule
143
+  \end{tabular}
144
+  \end{adjustbox}
145
+\end{table}
146
+
147
+
148
+\label{sect_preprocessing}
149
+Our download from \formatdate{30}{1}{2022} captures 77.838.777 million movies which we preprocess in several steps:
150
+
151
+\begin{itemize}
152
+    \item We consider only movies within the time frame from 1980 to 2020.
153
+    
154
+    \item We drop movies regarding the feature \emph{movie duration}. Some movies show a duration of a few single minutes. On the other extreme, some movies show of over 1000 minutes. Filtering the dataset from likely lower quality movies, movies with a duration above the 95\% quantile [135 min] or below the 5\% quantile [52 min] are removed and therefore ignored in our analysis.
155
+    
156
+    \item We only keep relevant features: The movie id (tconst), the movie release year (startYear), genres, the movie duration (runtimeMinutes), category (indicating if the movie contains actor(s) and/or actress(es) in the principal cast).
157
+    
158
+    \item We functionally derive dependend data. I.e., we derive the share and proportion of actresses that are in principal cast for each movie. We derive the proportion of the absolute numbers of actresses against actors.
159
+\end{itemize}
160
+
161
+
162
+For the second analysis only the time frame between 2000 and 2020 was considered. Therefore, the data set drops to a size of 880.209 movies. Additionally, the feature genre had to be further prepossessed. Genre covers 951 different entries, where the majority of movies presents genre overlaps such as Drama-Comedy or Drama-Thriller-Horror. Keeping all of those 951 genres as a dummy variable is messy. Splitting those overlaps of genres and allowing movies to have several genres would lead to dependencies. Hence, for further analysis only movies were considered that belong to a single genre (number of single genres = 24, new data set size = 43'680). This approach could also reveal that movies that are strictly assigned to one genre differ a lot in their features against other genres.
163
+
164
+
165
+% - Methoden
166
+\section{Methods}
167
+\label{sect_methods}
168
+\begin{figure}
169
+  \centering
170
+  %\fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
171
+  \includegraphics[width=1\textwidth]{fig-001_Share-in-principal-cast-of-actresses-in-all-movies-1980-2020.png}
172
+  \caption{Share in principal cast of actresses in all movies, 1980 - 2020.}
173
+  \label{actresses_prop_figure}
174
+\end{figure}
175
+
176
+\subsubsection*{Descriptive Analysis}
177
+Firstly, we use figure~\ref{actresses_prop_figure} to receive an overview about range of dispersion of the shares of actresses on principal cast for each single year. Here, the left time frame covers the years from 1980 to 1990 (marked with blue points) and the right time frame covers the years from 2000 to 2020 (marked with orange points). Additionally, for each year the mean value over the shares of actresses on principal cast was computed and marked with green and red points.
178
+Observing differences in the share of actresses on principal cast after 2000 is difficult to evaluate. The figure presents a high variation in the shares in principal cast of actresses, hence the computed means for each year go in line with high standard deviations. Hence, a clear change in pattern in the years after 2000 against the years before 2000 cannot be identified.
179
+However, the mean values presents to be slightly higher after 2000. 
180
+Presenting more qualitative insights of possible differences in the share of actresses on principal cast, significance test are implemented. 
181
+
182
+\subsubsection*{Statistical analysis}
183
+
184
+With t-testing, our goal is to find out if the mean $\mu_1$ on the proportion of actresses in principal roles from 2000-2020 differs significantly compared to the mean $\mu_0$ on the proportion of actresses in principal roles in 1980-2000.
185
+
186
+With beta-binomial-testing, we put a beta-prior on $f_0$ (the probability to experience an amount of shares) which is based on $m_0$ (the number of a share on movies in 1980-2000) in $n_0$ movies (the number of movies in 1980-2000).\\
187
+Next Under the null hypothesis $H_0: f_1 = f_0$, the number of movies with a share in 2000-2020 $m_1$ (given the number of movies in 2000-2020 $n_1$) follows a binomial distribution.\\
188
+This tells us the probability to observe $m_1$ shares for movies in 2000-2020, given the number of movies in 2000-2020 $n_1$ and the statistics $m_0$, $n_0$ for the years 1980-2000.
189
+
190
+\subsubsection*{Analyzing the relationship of the share of actresses on principal cast and average movie ratings and the suitability of linear regression models for predictive modeling}
191
+
192
+The relationship of the female share on principal cast on the average mean rating between 2000 and 2020 was analyzed by a scatter plot. Further, the linear regression model was implemented to evaluate its suitability as prediction model for the average rating on the share of actresses on the principal cast.
193
+Additionally, the impact of including the features movie duration and genre on the model fit of the linear regression was analyzed. For the latter model, only those movies were considered that covers a single genre. The genres were included as dummy variables, whereby the dummy variable for the genre "drama" was excluded due to multicollinearity. 
194
+
195
+%   - Statistiktests und regression
196
+
197
+\section{Results}
198
+\label{sect_results}
199
+
200
+With (1)~\ref{sect_intro} we want to study whether the proportion of principal roles filled by actresses differs between the periods 1980-2000 and 2000-2020. We do not find a clear indication in a visual~analysis~\ref{actresses_prop_figure}, we assume due to high variances and a discrete fashion of available data.
201
+
202
+The statistical tests in a non-visual analysis, more specifically the t-test and the beta-binomial-test result in insignificant p-values\footnote{\url{https://coreco.samstagskind.de/tobi/Gender-Share-in-Casting-Actors_DL-WS2122_public/src/branch/master/exp/exp-003_T-Test-Hypothesis-Testing.ipynb}}~\citep{gitrepo} except for two occasions on the beta-binomal-test that propose significance: Testing whether there are unlikely\footnote{\url{https://coreco.samstagskind.de/tobi/Gender-Share-in-Casting-Actors_DL-WS2122_public/src/branch/master/exp/exp-004_Beta-Binomial-Hypothesis-Testing.ipynb}}~\citep{gitrepo}\\
203
+\begin{itemize}
204
+    \item more movies with a majority of actresses in the principal roles.
205
+    \item less movies with a minority of actresses in the principal roles.
206
+\end{itemize}
207
+
208
+With (2)~\ref{sect_intro} we do not find a correlation of actress share of principal cast on average rating\footnote{\url{https://coreco.samstagskind.de/tobi/Gender-Share-in-Casting-Actors_DL-WS2122_public/src/branch/master/exp/exp-005_Relationship-Rating-and-Share-Actresses-on-principal-cast.ipynb}}~\citep{gitrepo}.
209
+Firstly, a simple scatter plot of the share of actresses on principal cast against the average rating did not present any pattern. Each value of the actress share covered almost the whole range of possible rating scores. Additionally, the pearson correlation coefficient was computed and affirmed no meaningful linear relationship by a value of -0.07. Due to those results, the previous idea of using a linear regression model could already be stated as an unsuitable prediction model, not fulfilling model assumptions of linearity. In line with this, the linear regression model presented a bad model fit by the R-squared value of 0.005. Even though the estimated coefficient for the actress share was significant, the aim of receiving accurate predictions for average movie rating on actress share is not given by a linear regression model with a single predictor.
210
+The results of including the movie duration and genre as additional explanatory variables into the linear regression model were again unsatisfactory. The overall model fit claimed to be better than in the first model, but was still bad by a R-squared of 0.22. Hence, the idea of controlling for single genres by dummy variables and therefore to receiving probably a lower variation in the data within all single genres is not given. 
211
+Positively, many dummy variables were significant, that incentives to further research of a possible relationship of actress share on principal cast and average rating within single genres.
212
+
213
+% - Probleme/Limitations
214
+
215
+\section{Discussion}
216
+\label{sect_discussion}
217
+ The paper does not detect a clear difference of the share of actresses on principal cast in the years before and after 2000. The significant tests provided contradictory results.
218
+ However, the use of the t test is to be questioned. The assumption of normal distributed data cannot be well fulfilled due to a more discrete pattern of the actress shares.
219
+ 
220
+ Additionally, the previous sticking to the goal of predicting the average rating by the share of actresses on principal cast was naive. The linear regression model was unsuitable as well as the small set of predictor variables.
221
+
222
+{
223
+\small
224
+
225
+\bibliography{bibliography}
226
+
227
+}
228
+
229
+\end{document}
230
+

+ 353
- 0
exp/exp-001_Data-Preprocessing-and-Provisioning.ipynb 查看文件

@@ -0,0 +1,353 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# Data Literacy - Project\n",
8
+    "## Gender Share in Movies\n",
9
+    "#### Tobias Stumpp, Sophia Herrmann"
10
+   ]
11
+  },
12
+  {
13
+   "cell_type": "markdown",
14
+   "metadata": {},
15
+   "source": [
16
+    "### README & TODO\n",
17
+    "\n",
18
+    "Please run all cells of this ipython document once. You may use the button that's revealed by executing the next cell.  \n",
19
+    "With an execution, this document prepares and provides files as a preprocessing step for all the experiments in this repository."
20
+   ]
21
+  },
22
+  {
23
+   "cell_type": "code",
24
+   "execution_count": 1,
25
+   "metadata": {},
26
+   "outputs": [
27
+    {
28
+     "data": {
29
+      "text/plain": [
30
+       "'Please click this button below to provide the required preprocessed data files for the experiments:'"
31
+      ]
32
+     },
33
+     "metadata": {},
34
+     "output_type": "display_data"
35
+    },
36
+    {
37
+     "data": {
38
+      "application/vnd.jupyter.widget-view+json": {
39
+       "model_id": "6d505788715c424d9776b876af6b6290",
40
+       "version_major": 2,
41
+       "version_minor": 0
42
+      },
43
+      "text/plain": [
44
+       "Button(description='Run all cells below', style=ButtonStyle())"
45
+      ]
46
+     },
47
+     "metadata": {},
48
+     "output_type": "display_data"
49
+    }
50
+   ],
51
+   "source": [
52
+    "from IPython.display import Javascript, display\n",
53
+    "from ipywidgets import widgets\n",
54
+    "\n",
55
+    "def run_all(ev):\n",
56
+    "    Javascript('IPython.Application.instance().kernel.do_shutdown(True)')\n",
57
+    "    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.ncells())'))\n",
58
+    "\n",
59
+    "display(\"Please click this button below to provide the required preprocessed data files for the experiments:\")\n",
60
+    "button = widgets.Button(description=\"Run all cells below\")\n",
61
+    "button.on_click(run_all)\n",
62
+    "display(button)"
63
+   ]
64
+  },
65
+  {
66
+   "cell_type": "code",
67
+   "execution_count": 2,
68
+   "metadata": {},
69
+   "outputs": [],
70
+   "source": [
71
+    "import numpy as np\n",
72
+    "import pandas as pd\n",
73
+    "import os"
74
+   ]
75
+  },
76
+  {
77
+   "cell_type": "code",
78
+   "execution_count": 3,
79
+   "metadata": {},
80
+   "outputs": [],
81
+   "source": [
82
+    "path = '../'\n",
83
+    "os.chdir(path)"
84
+   ]
85
+  },
86
+  {
87
+   "cell_type": "markdown",
88
+   "metadata": {},
89
+   "source": [
90
+    "### Extract data archive files"
91
+   ]
92
+  },
93
+  {
94
+   "cell_type": "code",
95
+   "execution_count": 4,
96
+   "metadata": {},
97
+   "outputs": [],
98
+   "source": [
99
+    "import gzip\n",
100
+    "import shutil\n",
101
+    "\n",
102
+    "files = [\n",
103
+    "    'dat/title.basics.tsv.gz',\n",
104
+    "    'dat/title.principals.tsv.gz',\n",
105
+    "    'dat/title.ratings.tsv.gz',\n",
106
+    "]\n",
107
+    "\n",
108
+    "def unzip(files=files):\n",
109
+    "    for file in files:\n",
110
+    "        if file.endswith('.gz'):\n",
111
+    "            with gzip.open(file, 'rb') as f_in:\n",
112
+    "                with open(file[:-3], 'wb') as f_out:\n",
113
+    "                    shutil.copyfileobj(f_in, f_out)\n",
114
+    "\n",
115
+    "unzip(files)"
116
+   ]
117
+  },
118
+  {
119
+   "cell_type": "markdown",
120
+   "metadata": {},
121
+   "source": [
122
+    "### Read data files"
123
+   ]
124
+  },
125
+  {
126
+   "cell_type": "code",
127
+   "execution_count": 5,
128
+   "metadata": {},
129
+   "outputs": [],
130
+   "source": [
131
+    "data_film = pd.read_csv('dat/title.basics.tsv', sep='\\t', na_values=['\\\\N'], dtype={\n",
132
+    "    \"isAdult\": bool,\n",
133
+    "    \"startYear\": float,\n",
134
+    "    \"endYear\": float, \n",
135
+    "    \"runtimeMinutes\": float,\n",
136
+    "    },\n",
137
+    "    # Skip lines that are syntactically incorrect and would therefore cause\n",
138
+    "    # - a column shift within the row\n",
139
+    "    # - assignment errors for column datatypes\n",
140
+    "    skiprows=[\n",
141
+    "        1098292,\n",
142
+    "        1510501,\n",
143
+    "        1900901,\n",
144
+    "        2012237,\n",
145
+    "        2167663,\n",
146
+    "        2313911,\n",
147
+    "        3012068,\n",
148
+    "        5964307,\n",
149
+    "        8605235,\n",
150
+    "        8645208,\n",
151
+    "    ]\n",
152
+    ")\n",
153
+    "# tconst (string) - alphanumeric unique identifier of the title\n",
154
+    "# titleType (string) – the type/format of the title (e.g. movie, short, tvseries, tvepisode, video, etc)\n",
155
+    "# primaryTitle (string) – the more popular title / the title used by the filmmakers on promotional materials at the point of release\n",
156
+    "# originalTitle (string) - original title, in the original language\n",
157
+    "# isAdult (boolean) - 0: non-adult title; 1: adult title\n",
158
+    "# startYear (YYYY) – represents the release year of a title. In the case of TV Series, it is the series start year\n",
159
+    "# endYear (YYYY) – TV Series end year. ‘\\N’ for all other title types\n",
160
+    "# runtimeMinutes – primary runtime of the title, in minutes\n",
161
+    "# genres (string array) – includes up to three genres associated with the title\n",
162
+    "\n",
163
+    "data_rating = data = pd.read_csv('dat/title.ratings.tsv', sep='\\t', na_values=['\\\\N'], dtype={\n",
164
+    "    \"averageRating\": float,\n",
165
+    "    \"numVotes\": float,\n",
166
+    "})\n",
167
+    "# tconst (string) - alphanumeric unique identifier of the title\n",
168
+    "# averageRating – weighted average of all the individual user ratings\n",
169
+    "# numVotes - number of votes the title has received\n",
170
+    "\n",
171
+    "data_principals = pd.read_csv('dat/title.principals.tsv', sep='\\t', na_values=['\\\\N'], dtype={\n",
172
+    "    \"ordering\": float,\n",
173
+    "})\n",
174
+    "# tconst (string) - alphanumeric unique identifier of the title\n",
175
+    "# ordering (integer) – a number to uniquely identify rows for a given titleId\n",
176
+    "# nconst (string) - alphanumeric unique identifier of the name/person\n",
177
+    "# category (string) - the category of job that person was in\n",
178
+    "# job (string) - the specific job title if applicable, else '\\N'\n",
179
+    "# characters (string) - the name of the character played if applicable, else '\\N'"
180
+   ]
181
+  },
182
+  {
183
+   "cell_type": "markdown",
184
+   "metadata": {},
185
+   "source": [
186
+    "----------\n",
187
+    "\n",
188
+    "###  Clean and merge original data into prepared datasets for experiments"
189
+   ]
190
+  },
191
+  {
192
+   "cell_type": "code",
193
+   "execution_count": 6,
194
+   "metadata": {},
195
+   "outputs": [
196
+    {
197
+     "data": {
198
+      "text/plain": [
199
+       "'Initially, the dataset contains 600289 movies.'"
200
+      ]
201
+     },
202
+     "metadata": {},
203
+     "output_type": "display_data"
204
+    },
205
+    {
206
+     "data": {
207
+      "text/plain": [
208
+       "'Quantiles 5% and 95% on runtime minutes yield as delimitation minutes [52.0, 135.0].'"
209
+      ]
210
+     },
211
+     "metadata": {},
212
+     "output_type": "display_data"
213
+    },
214
+    {
215
+     "data": {
216
+      "text/plain": [
217
+       "'After dropping rows of these quantiles, the dataset contains 341225 movies, which is 259064 less movies.'"
218
+      ]
219
+     },
220
+     "metadata": {},
221
+     "output_type": "display_data"
222
+    }
223
+   ],
224
+   "source": [
225
+    "# Keep only higher quality movies, hence,\n",
226
+    "# - drop rows whose types aren't movies\n",
227
+    "data_film.drop(data_film.index[(data_film[\"titleType\"] != \"movie\")], axis = 0, inplace=True)\n",
228
+    "\n",
229
+    "# - drop rows with *untypical runtime minutes*\n",
230
+    "movies_count_before = data_film.shape[0]\n",
231
+    "quantile = data_film[\"runtimeMinutes\"].quantile([0.05,0.95])\n",
232
+    "\n",
233
+    "data_film = data_film[\n",
234
+    "    (data_film[\"runtimeMinutes\"] >= quantile[0.05]) &\n",
235
+    "    (data_film[\"runtimeMinutes\"] <= quantile[0.95])\n",
236
+    "]\n",
237
+    "\n",
238
+    "movies_count_after = data_film.shape[0]\n",
239
+    "\n",
240
+    "display(f\"Initially, the dataset contains {movies_count_before} movies.\")\n",
241
+    "display(f\"Quantiles 5% and 95% on runtime minutes yield as delimitation minutes {list(quantile)}.\")\n",
242
+    "display(f\"After dropping rows of these quantiles, the dataset contains {movies_count_after} movies, which is {movies_count_before-movies_count_after} less movies.\")"
243
+   ]
244
+  },
245
+  {
246
+   "cell_type": "code",
247
+   "execution_count": 7,
248
+   "metadata": {},
249
+   "outputs": [],
250
+   "source": [
251
+    "# Drop features that for our analysis are either irrelevant or incomplete\n",
252
+    "data_film.drop([\"titleType\", \"primaryTitle\", \"originalTitle\", \"isAdult\", \"endYear\"], axis = 1, inplace=True)\n",
253
+    "data_film.dropna(subset=[\"startYear\", \"runtimeMinutes\"], inplace=True)\n",
254
+    "\n",
255
+    "data_principals.drop([\"ordering\", \"nconst\", \"job\", \"characters\"], axis = 1, inplace=True)\n",
256
+    "\n",
257
+    "# Filter principal cast members for only actors and actresses\n",
258
+    "data_principals = data_principals[\n",
259
+    "    (data_principals[\"category\"] == \"actor\") |\n",
260
+    "    (data_principals[\"category\"] == \"actress\")\n",
261
+    "]"
262
+   ]
263
+  },
264
+  {
265
+   "cell_type": "code",
266
+   "execution_count": 8,
267
+   "metadata": {},
268
+   "outputs": [],
269
+   "source": [
270
+    "# Merge movie data\n",
271
+    "data_movie = pd.merge(data_film, data_rating, how=\"inner\", on=\"tconst\")\n",
272
+    "data_movie = pd.merge(data_movie, data_principals, how=\"inner\", on=\"tconst\")"
273
+   ]
274
+  },
275
+  {
276
+   "cell_type": "code",
277
+   "execution_count": 9,
278
+   "metadata": {},
279
+   "outputs": [],
280
+   "source": [
281
+    "# Provide atomic genre data on movies\n",
282
+    "data_movie_genre = data_film.copy()\n",
283
+    "\n",
284
+    "# Drop features that for genres are irrelevant or incomplete\n",
285
+    "data_movie_genre.drop([\"startYear\", \"runtimeMinutes\"], axis=1, inplace=True)\n",
286
+    "data_movie_genre.dropna(subset=[\"genres\"], inplace=True)\n",
287
+    "\n",
288
+    "# Break down genre to atomic data\n",
289
+    "data_movie_genre[\"genres\"] = data_movie_genre[\"genres\"].str.split(\",\")\n",
290
+    "data_movie_genre = data_movie_genre.explode(\"genres\").reset_index(drop=True)\n",
291
+    "\n",
292
+    "# Correct column title to fit atomic data\n",
293
+    "data_movie_genre = data_movie_genre.rename(columns = {\"genres\": \"genre\"})"
294
+   ]
295
+  },
296
+  {
297
+   "cell_type": "markdown",
298
+   "metadata": {},
299
+   "source": [
300
+    "### Convert integer numbers to integer datatypes"
301
+   ]
302
+  },
303
+  {
304
+   "cell_type": "code",
305
+   "execution_count": 10,
306
+   "metadata": {},
307
+   "outputs": [],
308
+   "source": [
309
+    "data_movie[\"startYear\"] = data_movie[\"startYear\"].astype(int)\n",
310
+    "data_movie[\"runtimeMinutes\"] = data_movie[\"runtimeMinutes\"].astype(int)\n",
311
+    "data_movie[\"numVotes\"] = data_movie[\"numVotes\"].astype(int)"
312
+   ]
313
+  },
314
+  {
315
+   "cell_type": "markdown",
316
+   "metadata": {},
317
+   "source": [
318
+    "### Write preprocessed data to files"
319
+   ]
320
+  },
321
+  {
322
+   "cell_type": "code",
323
+   "execution_count": 11,
324
+   "metadata": {},
325
+   "outputs": [],
326
+   "source": [
327
+    "data_movie.to_csv(\"dat/data_movie.csv\", index=False)\n",
328
+    "data_movie_genre.to_csv(\"dat/data_movie_genre.csv\", index=False)"
329
+   ]
330
+  }
331
+ ],
332
+ "metadata": {
333
+  "kernelspec": {
334
+   "display_name": "Python 3",
335
+   "language": "python",
336
+   "name": "python3"
337
+  },
338
+  "language_info": {
339
+   "codemirror_mode": {
340
+    "name": "ipython",
341
+    "version": 3
342
+   },
343
+   "file_extension": ".py",
344
+   "mimetype": "text/x-python",
345
+   "name": "python",
346
+   "nbconvert_exporter": "python",
347
+   "pygments_lexer": "ipython3",
348
+   "version": "3.8.8"
349
+  }
350
+ },
351
+ "nbformat": 4,
352
+ "nbformat_minor": 4
353
+}

+ 233
- 0
exp/exp-002_Share-in-principal-cast-of-actresses-in-all-movies-1980-2020.ipynb
文件差異過大導致無法顯示
查看文件


+ 2951
- 0
exp/exp-003_T-Test-Hypothesis-Testing.ipynb
文件差異過大導致無法顯示
查看文件


+ 711
- 0
exp/exp-004_Beta-Binomial-Hypothesis-Testing.ipynb 查看文件

@@ -0,0 +1,711 @@
1
+{
2
+ "cells": [
3
+  {
4
+   "cell_type": "markdown",
5
+   "metadata": {},
6
+   "source": [
7
+    "# Data Literacy - Project\n",
8
+    "## Gender Share in Movies\n",
9
+    "#### Tobias Stumpp, Sophia Herrmann"
10
+   ]
11
+  },
12
+  {
13
+   "cell_type": "markdown",
14
+   "metadata": {},
15
+   "source": [
16
+    "## Beta-Binomial Hypothesis Testing"
17
+   ]
18
+  },
19
+  {
20
+   "cell_type": "markdown",
21
+   "metadata": {},
22
+   "source": [
23
+    "### Parameters"
24
+   ]
25
+  },
26
+  {
27
+   "cell_type": "code",
28
+   "execution_count": null,
29
+   "metadata": {},
30
+   "outputs": [],
31
+   "source": [
32
+    "# Starting year of the period of years covered by the test\n",
33
+    "start_year = 1980\n",
34
+    "# Ending year of the period of years covered by the test\n",
35
+    "end_year = start_year + 40\n",
36
+    "\n",
37
+    "# Split year of the period of years covered by the test that separates\n",
38
+    "# indicative data (>= start_year and < split_year)\n",
39
+    "# from\n",
40
+    "# data to be verified (>= split_year and < end_year).\n",
41
+    "split_year = start_year + 20\n",
42
+    "\n",
43
+    "# Option to ignore movies where the average rating or the number of votes is below the respective 5% quantile.\n",
44
+    "ignore_irrelevant_movies = False"
45
+   ]
46
+  },
47
+  {
48
+   "cell_type": "markdown",
49
+   "metadata": {},
50
+   "source": [
51
+    "### Meta"
52
+   ]
53
+  },
54
+  {
55
+   "cell_type": "code",
56
+   "execution_count": null,
57
+   "metadata": {},
58
+   "outputs": [],
59
+   "source": [
60
+    "import numpy as np\n",
61
+    "import pandas as pd\n",
62
+    "import os\n",
63
+    "import matplotlib.pyplot as plt"
64
+   ]
65
+  },
66
+  {
67
+   "cell_type": "code",
68
+   "execution_count": null,
69
+   "metadata": {},
70
+   "outputs": [],
71
+   "source": [
72
+    "path = '../dat/'\n",
73
+    "os.chdir(path)"
74
+   ]
75
+  },
76
+  {
77
+   "cell_type": "markdown",
78
+   "metadata": {},
79
+   "source": [
80
+    "### Read Data"
81
+   ]
82
+  },
83
+  {
84
+   "cell_type": "code",
85
+   "execution_count": null,
86
+   "metadata": {},
87
+   "outputs": [],
88
+   "source": [
89
+    "columns = list(pd.read_csv('data_movie.csv', nrows=1))\n",
90
+    "print(columns)"
91
+   ]
92
+  },
93
+  {
94
+   "cell_type": "code",
95
+   "execution_count": null,
96
+   "metadata": {},
97
+   "outputs": [],
98
+   "source": [
99
+    "columns_to_read = [c for c in columns if c != 'genres']\n",
100
+    "\n",
101
+    "data_movie = pd.read_csv('data_movie.csv', usecols = columns_to_read)\n",
102
+    "\n",
103
+    "display(data_movie.info())\n",
104
+    "display(data_movie.head())"
105
+   ]
106
+  },
107
+  {
108
+   "cell_type": "markdown",
109
+   "metadata": {},
110
+   "source": [
111
+    "---"
112
+   ]
113
+  },
114
+  {
115
+   "cell_type": "markdown",
116
+   "metadata": {},
117
+   "source": [
118
+    "#### Provide the option to only include movies that are relevant based on the average rating and number of votes."
119
+   ]
120
+  },
121
+  {
122
+   "cell_type": "code",
123
+   "execution_count": null,
124
+   "metadata": {},
125
+   "outputs": [],
126
+   "source": [
127
+    "data_movie[['numVotes','averageRating']].describe()"
128
+   ]
129
+  },
130
+  {
131
+   "cell_type": "code",
132
+   "execution_count": null,
133
+   "metadata": {},
134
+   "outputs": [],
135
+   "source": [
136
+    "numVotes_split = data_movie['numVotes'].quantile(0.05)\n",
137
+    "numVotes_split"
138
+   ]
139
+  },
140
+  {
141
+   "cell_type": "code",
142
+   "execution_count": null,
143
+   "metadata": {},
144
+   "outputs": [],
145
+   "source": [
146
+    "averageRating_split = data_movie['averageRating'].quantile(0.05)\n",
147
+    "averageRating_split"
148
+   ]
149
+  },
150
+  {
151
+   "cell_type": "code",
152
+   "execution_count": null,
153
+   "metadata": {},
154
+   "outputs": [],
155
+   "source": [
156
+    "display(data_movie.shape)"
157
+   ]
158
+  },
159
+  {
160
+   "cell_type": "code",
161
+   "execution_count": null,
162
+   "metadata": {},
163
+   "outputs": [],
164
+   "source": [
165
+    "if ignore_irrelevant_movies:\n",
166
+    "    data_movie = data_movie[(data_movie['numVotes'] > numVotes_split) & (data_movie['averageRating'] > averageRating_split)]"
167
+   ]
168
+  },
169
+  {
170
+   "cell_type": "code",
171
+   "execution_count": null,
172
+   "metadata": {},
173
+   "outputs": [],
174
+   "source": [
175
+    "display(data_movie.shape)"
176
+   ]
177
+  },
178
+  {
179
+   "cell_type": "markdown",
180
+   "metadata": {},
181
+   "source": [
182
+    "---"
183
+   ]
184
+  },
185
+  {
186
+   "cell_type": "markdown",
187
+   "metadata": {},
188
+   "source": [
189
+    "#### Only include the data to movies of the selected range of years."
190
+   ]
191
+  },
192
+  {
193
+   "cell_type": "code",
194
+   "execution_count": null,
195
+   "metadata": {},
196
+   "outputs": [],
197
+   "source": [
198
+    "display(data_movie.shape)"
199
+   ]
200
+  },
201
+  {
202
+   "cell_type": "code",
203
+   "execution_count": null,
204
+   "metadata": {},
205
+   "outputs": [],
206
+   "source": [
207
+    "data_movie = data_movie[(data_movie['startYear'] >= start_year) & (data_movie['startYear'] < end_year)]"
208
+   ]
209
+  },
210
+  {
211
+   "cell_type": "code",
212
+   "execution_count": null,
213
+   "metadata": {},
214
+   "outputs": [],
215
+   "source": [
216
+    "display(data_movie.shape)"
217
+   ]
218
+  },
219
+  {
220
+   "cell_type": "markdown",
221
+   "metadata": {},
222
+   "source": [
223
+    "### Prepare Data"
224
+   ]
225
+  },
226
+  {
227
+   "cell_type": "markdown",
228
+   "metadata": {},
229
+   "source": [
230
+    "##### Add year span as a column"
231
+   ]
232
+  },
233
+  {
234
+   "cell_type": "code",
235
+   "execution_count": null,
236
+   "metadata": {},
237
+   "outputs": [],
238
+   "source": [
239
+    "year_span_presplit = f\"{start_year}-{split_year}\"\n",
240
+    "year_span_postsplit = f\"{split_year}-{end_year}\"\n",
241
+    "year_span = np.where(data_movie['startYear'] < split_year, year_span_presplit, year_span_postsplit)\n",
242
+    "data_movie.insert(1, 'year_span' , year_span)\n",
243
+    "\n",
244
+    "display(data_movie)"
245
+   ]
246
+  },
247
+  {
248
+   "cell_type": "markdown",
249
+   "metadata": {},
250
+   "source": [
251
+    "##### Add counts and proportions on crew members"
252
+   ]
253
+  },
254
+  {
255
+   "cell_type": "code",
256
+   "execution_count": null,
257
+   "metadata": {},
258
+   "outputs": [],
259
+   "source": [
260
+    "data_cast_numbers = pd.crosstab(data_movie['tconst'], data_movie['category']).reset_index().rename(columns = {\n",
261
+    "    'actor':'num_actors',\n",
262
+    "    'actress':'num_actresses',\n",
263
+    "})\n",
264
+    "\n",
265
+    "data_cast_proportion = data_movie.groupby(['tconst'])['category'].value_counts(normalize=True).unstack().reset_index().fillna(0).rename(columns = {\n",
266
+    "    'actor':'prop_actors',\n",
267
+    "    'actress':'prop_actresses',\n",
268
+    "})\n",
269
+    "\n",
270
+    "data_cast_gender_stat = pd.merge(data_cast_numbers, data_cast_proportion)\n",
271
+    "data_cast_gender_stat"
272
+   ]
273
+  },
274
+  {
275
+   "cell_type": "code",
276
+   "execution_count": null,
277
+   "metadata": {},
278
+   "outputs": [],
279
+   "source": [
280
+    "data_movie_distinct = data_movie.drop(columns=['category']).drop_duplicates(['tconst']).reset_index(drop = True)\n",
281
+    "display(data_movie_distinct)\n",
282
+    "\n",
283
+    "data_movie_gender_stat = pd.merge(data_movie_distinct, data_cast_gender_stat)\n",
284
+    "data_movie_gender_stat.groupby('year_span').apply(display)"
285
+   ]
286
+  },
287
+  {
288
+   "cell_type": "markdown",
289
+   "metadata": {},
290
+   "source": [
291
+    "---"
292
+   ]
293
+  },
294
+  {
295
+   "cell_type": "markdown",
296
+   "metadata": {},
297
+   "source": [
298
+    "##### Add counts on proportions of actresses relative to actors"
299
+   ]
300
+  },
301
+  {
302
+   "cell_type": "code",
303
+   "execution_count": null,
304
+   "metadata": {},
305
+   "outputs": [],
306
+   "source": [
307
+    "data_movie_gender_stat['num_actresses_>_num_actors'] = (data_movie_gender_stat['num_actresses'] > data_movie_gender_stat['num_actors'])\n",
308
+    "data_movie_gender_stat['num_actresses_=_num_actors'] = (data_movie_gender_stat['num_actresses'] == data_movie_gender_stat['num_actors'])\n",
309
+    "data_movie_gender_stat['num_actresses_<_num_actors'] = (data_movie_gender_stat['num_actresses'] < data_movie_gender_stat['num_actors'])\n",
310
+    "\n",
311
+    "data_movie_gender_stat['num_actresses_=_0'] = (data_movie_gender_stat['num_actresses'] == 0)\n",
312
+    "data_movie_gender_stat['num_actresses_>_0'] = (data_movie_gender_stat['num_actresses'] > 0)\n",
313
+    "\n",
314
+    "data_movie_gender_stat"
315
+   ]
316
+  },
317
+  {
318
+   "cell_type": "code",
319
+   "execution_count": null,
320
+   "metadata": {},
321
+   "outputs": [],
322
+   "source": [
323
+    "data_actresses_stat = data_movie_gender_stat.groupby(['year_span','startYear'])[[\n",
324
+    "    'num_actresses_>_num_actors',\n",
325
+    "    'num_actresses_=_num_actors',\n",
326
+    "    'num_actresses_<_num_actors',\n",
327
+    "    'num_actresses_=_0',\n",
328
+    "    'num_actresses_>_0',\n",
329
+    "]].sum().reset_index()\n",
330
+    "\n",
331
+    "data_actresses_stat['num_movies'] = (\n",
332
+    "    data_actresses_stat['num_actresses_>_num_actors'] +\n",
333
+    "    data_actresses_stat['num_actresses_=_num_actors'] +\n",
334
+    "    data_actresses_stat['num_actresses_<_num_actors']\n",
335
+    ")\n",
336
+    "\n",
337
+    "data_actresses_stat"
338
+   ]
339
+  },
340
+  {
341
+   "cell_type": "markdown",
342
+   "metadata": {},
343
+   "source": [
344
+    "---"
345
+   ]
346
+  },
347
+  {
348
+   "cell_type": "markdown",
349
+   "metadata": {},
350
+   "source": [
351
+    "##### Split data into their year spans"
352
+   ]
353
+  },
354
+  {
355
+   "cell_type": "code",
356
+   "execution_count": null,
357
+   "metadata": {},
358
+   "outputs": [],
359
+   "source": [
360
+    "data_actresses_stat_timespan_presplit, data_actresses_stat_timespan_postsplit = [\n",
361
+    "    g.reset_index(drop=True) for _, g in data_actresses_stat.groupby(['year_span'])\n",
362
+    "]"
363
+   ]
364
+  },
365
+  {
366
+   "cell_type": "code",
367
+   "execution_count": null,
368
+   "metadata": {},
369
+   "outputs": [],
370
+   "source": [
371
+    "display(data_actresses_stat_timespan_presplit)\n",
372
+    "display(data_actresses_stat_timespan_postsplit)"
373
+   ]
374
+  },
375
+  {
376
+   "cell_type": "code",
377
+   "execution_count": null,
378
+   "metadata": {},
379
+   "outputs": [],
380
+   "source": [
381
+    "display(data_actresses_stat_timespan_presplit.describe())\n",
382
+    "display(data_actresses_stat_timespan_postsplit.describe())"
383
+   ]
384
+  },
385
+  {
386
+   "cell_type": "markdown",
387
+   "metadata": {},
388
+   "source": [
389
+    "---"
390
+   ]
391
+  },
392
+  {
393
+   "cell_type": "code",
394
+   "execution_count": null,
395
+   "metadata": {},
396
+   "outputs": [],
397
+   "source": [
398
+    "data_actresses_stat_sum = data_actresses_stat.drop(columns=['startYear']).groupby(['year_span']).sum()\n",
399
+    "data_actresses_stat_sum"
400
+   ]
401
+  },
402
+  {
403
+   "cell_type": "markdown",
404
+   "metadata": {},
405
+   "source": [
406
+    "---"
407
+   ]
408
+  },
409
+  {
410
+   "cell_type": "markdown",
411
+   "metadata": {},
412
+   "source": [
413
+    "### Analyze Data"
414
+   ]
415
+  },
416
+  {
417
+   "cell_type": "markdown",
418
+   "metadata": {},
419
+   "source": [
420
+    "#### Compute p-Values\n",
421
+    "\n",
422
+    "Our goal is to find out if actresses achieved significantly more movies with *majority shares* or less movies with *minority shares* in the principal casts after the split year than before the split year.  \n",
423
+    "We perform a beta-binomial test and explicitly follow the example presented in the lecture and exercise on scores of the German Bundesliga."
424
+   ]
425
+  },
426
+  {
427
+   "cell_type": "markdown",
428
+   "metadata": {},
429
+   "source": [
430
+    "- First, we put a beta-prior on $f_0$ (the majority probability before the split year) which is based on $m_0$ (the number of movies with majority share before the split year) in $n_0$ movies (the number of movies before the split year).\n",
431
+    "\n",
432
+    "- Under the null hypothesis $H_0: f_1 = f_0$, the number of movies with majority share after the split year $m_1$ (given the number of movies after the split year $n_1$) follows a binomial distribution. \n",
433
+    "\n",
434
+    "- Putting these building blocks together, we obtain a [beta-binomial distribution](https://en.wikipedia.org/wiki/Beta-binomial_distribution)\n",
435
+    "\n",
436
+    "    \\begin{equation}\n",
437
+    "    p(m_1 \\vert n_1, m_0, n_0) \n",
438
+    "    = {n_1\\choose m_1} \n",
439
+    "    \\frac{\\mathcal{B}(m_0 + m_1 + 1, (n_0-m_0) + (n_1-m_1) + 1)}\n",
440
+    "    {\\mathcal{B}(m_0 + 1, n_0 - m_0 + 1)}.\n",
441
+    "    \\end{equation}\n",
442
+    "\n",
443
+    "    This tells us the probability to observe $m_1$ for movies with *majority shares* after the split year, given the number of movies after the split year $n_1$ and the statistics $m_0$, $n_0$ for the years before."
444
+   ]
445
+  },
446
+  {
447
+   "cell_type": "code",
448
+   "execution_count": null,
449
+   "metadata": {},
450
+   "outputs": [],
451
+   "source": [
452
+    "from scipy.stats import betabinom"
453
+   ]
454
+  },
455
+  {
456
+   "cell_type": "code",
457
+   "execution_count": null,
458
+   "metadata": {},
459
+   "outputs": [],
460
+   "source": [
461
+    "def p_val_won(m_1, n_1, m_0, n_0):\n",
462
+    "    \"\"\"\n",
463
+    "    Compute p-value by summing the evidence p(m_1 | n_1, m_0, n_0) over the \n",
464
+    "    observed number of won movies and 'more extreme' (i.e. smaller) movie counts.\n",
465
+    "    \n",
466
+    "    Parameters\n",
467
+    "    ----------\n",
468
+    "    m_1 : int\n",
469
+    "        Number of won movies after the split year (0 <= m_1 <= n_1)\n",
470
+    "    n_1 : int\n",
471
+    "        Number of movies after the split year (n_1 > 0)\n",
472
+    "    m_0 : int\n",
473
+    "        Number of won movies before the split year (0 <= m_0 <= n_0)\n",
474
+    "    n_0 : int\n",
475
+    "        Number of movies before the split year (n_0 > 0)\n",
476
+    "    \n",
477
+    "    Result\n",
478
+    "    ------\n",
479
+    "    The probability for observing m_1 or less movies.\n",
480
+    "    \"\"\"\n",
481
+    "    return betabinom.cdf(m_1, n_1, m_0 + 1, n_0 - m_0 + 1)"
482
+   ]
483
+  },
484
+  {
485
+   "cell_type": "code",
486
+   "execution_count": null,
487
+   "metadata": {},
488
+   "outputs": [],
489
+   "source": [
490
+    "def p_val_lost(m_1, n_1, m_0, n_0):\n",
491
+    "    \"\"\"\n",
492
+    "    Compute p-value by summing the evidence p(m_1 | n_1, m_0, n_0) over the \n",
493
+    "    observed number of lost movies and 'more extreme' (i.e. larger) movie counts.\n",
494
+    "    \n",
495
+    "    Parameters\n",
496
+    "    ----------\n",
497
+    "    m_1 : int\n",
498
+    "        Number of lost movies after the split year (0 <= m_1 <= n_1)\n",
499
+    "    n_1 : int\n",
500
+    "        Number of movies after the split year (n_1 > 0)\n",
501
+    "    m_0 : int\n",
502
+    "        Number of lost movies before the split year (0 <= m_0 <= n_0)\n",
503
+    "    n_0 : int\n",
504
+    "        Number of movies before the split year (n_0 > 0)\n",
505
+    "    \n",
506
+    "    Result\n",
507
+    "    ------\n",
508
+    "    The probability for observing m_1 or more movies.\n",
509
+    "    \"\"\"\n",
510
+    "    return 1.0 - betabinom.cdf(m_1 - 1, n_1, m_0 + 1, n_0 - m_0 + 1)"
511
+   ]
512
+  },
513
+  {
514
+   "cell_type": "code",
515
+   "execution_count": null,
516
+   "metadata": {},
517
+   "outputs": [],
518
+   "source": [
519
+    "def print_result(p_val):\n",
520
+    "    alpha = 0.05\n",
521
+    "    # Significant results?\n",
522
+    "    print(f\"{'Yes' if (p_val <= alpha) else 'No'}, the result is {'significant' if (p_val <= alpha) else 'insignificant'} because given the pre-split-year data, observing the post-split-year data has a {p_val*100:.2f}% probability.\")"
523
+   ]
524
+  },
525
+  {
526
+   "cell_type": "markdown",
527
+   "metadata": {},
528
+   "source": [
529
+    "#### Are there more movies with a majority of actresses in the principal roles?"
530
+   ]
531
+  },
532
+  {
533
+   "cell_type": "code",
534
+   "execution_count": null,
535
+   "metadata": {},
536
+   "outputs": [],
537
+   "source": [
538
+    "p_val_actresses_in_majority = p_val_lost(\n",
539
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_>_num_actors'], # <---\n",
540
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
541
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_>_num_actors'],  # <---\n",
542
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
543
+    ")\n",
544
+    "\n",
545
+    "print_result(p_val_actresses_in_majority)"
546
+   ]
547
+  },
548
+  {
549
+   "cell_type": "markdown",
550
+   "metadata": {},
551
+   "source": [
552
+    "#### Are there less movies with a minority of actresses in the principal roles?"
553
+   ]
554
+  },
555
+  {
556
+   "cell_type": "code",
557
+   "execution_count": null,
558
+   "metadata": {},
559
+   "outputs": [],
560
+   "source": [
561
+    "p_val_actresses_in_minority = p_val_won(\n",
562
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_<_num_actors'], # <---\n",
563
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
564
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_<_num_actors'],  # <---\n",
565
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
566
+    ")\n",
567
+    "\n",
568
+    "print_result(p_val_actresses_in_minority)"
569
+   ]
570
+  },
571
+  {
572
+   "cell_type": "markdown",
573
+   "metadata": {},
574
+   "source": [
575
+    "#### Are there less movies with a majority of actresses in the principal roles?"
576
+   ]
577
+  },
578
+  {
579
+   "cell_type": "code",
580
+   "execution_count": null,
581
+   "metadata": {},
582
+   "outputs": [],
583
+   "source": [
584
+    "p_val_actresses_in_majority = p_val_won(\n",
585
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_>_num_actors'], # <---\n",
586
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
587
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_>_num_actors'],  # <---\n",
588
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
589
+    ")\n",
590
+    "\n",
591
+    "print_result(p_val_actresses_in_majority)"
592
+   ]
593
+  },
594
+  {
595
+   "cell_type": "markdown",
596
+   "metadata": {},
597
+   "source": [
598
+    "#### Are there more movies with a minority of actresses in the principal roles?"
599
+   ]
600
+  },
601
+  {
602
+   "cell_type": "code",
603
+   "execution_count": null,
604
+   "metadata": {},
605
+   "outputs": [],
606
+   "source": [
607
+    "p_val_actresses_in_minority = p_val_lost(\n",
608
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_<_num_actors'], # <---\n",
609
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
610
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_<_num_actors'],  # <---\n",
611
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
612
+    ")\n",
613
+    "\n",
614
+    "print_result(p_val_actresses_in_minority)"
615
+   ]
616
+  },
617
+  {
618
+   "cell_type": "markdown",
619
+   "metadata": {},
620
+   "source": [
621
+    "---"
622
+   ]
623
+  },
624
+  {
625
+   "cell_type": "markdown",
626
+   "metadata": {},
627
+   "source": [
628
+    "#### Are there less movies with zero actresses in the principal roles?"
629
+   ]
630
+  },
631
+  {
632
+   "cell_type": "code",
633
+   "execution_count": null,
634
+   "metadata": {},
635
+   "outputs": [],
636
+   "source": [
637
+    "p_val_actresses_eq_zero = p_val_won(\n",
638
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_=_0'], # <---\n",
639
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
640
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_=_0'],  # <---\n",
641
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
642
+    ")\n",
643
+    "\n",
644
+    "print_result(p_val_actresses_eq_zero)"
645
+   ]
646
+  },
647
+  {
648
+   "cell_type": "markdown",
649
+   "metadata": {},
650
+   "source": [
651
+    "#### Are there more movies with more than zero actresses in the principal roles?"
652
+   ]
653
+  },
654
+  {
655
+   "cell_type": "code",
656
+   "execution_count": null,
657
+   "metadata": {},
658
+   "outputs": [],
659
+   "source": [
660
+    "p_val_actresses_gt_zero = p_val_lost(\n",
661
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_actresses_>_0'], # <---\n",
662
+    "    data_actresses_stat_sum.loc[year_span_postsplit,'num_movies'],\n",
663
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_actresses_>_0'],  # <---\n",
664
+    "    data_actresses_stat_sum.loc[year_span_presplit, 'num_movies'],\n",
665
+    ")\n",
666
+    "\n",
667
+    "print_result(p_val_actresses_gt_zero)"
668
+   ]
669
+  },
670
+  {
671
+   "cell_type": "markdown",
672
+   "metadata": {},
673
+   "source": [
674
+    "### Results"
675
+   ]
676
+  },
677
+  {
678
+   "cell_type": "markdown",
679
+   "metadata": {},
680
+   "source": [
681
+    "In summary, the series on the beta-binomial test shows that there are more films with a majority of actresses and fewer films with a minority of actresses in the lead roles.  \n",
682
+    "The rest of the tests with this test did not show significance.\n",
683
+    "\n",
684
+    "We interpret the results overall as an indicator of improvement in the proportion of principal actresses.\n",
685
+    "\n",
686
+    "Note: It is difficult for us to evaluate how reliable these results are. On the one hand, we've learned about the test method on a close example in the lecture and we are convinced that we can apply this model to this movie cast data, on the other hand, we don't know how meaningful this result imposes on the ratio of actors and actresses, despite the striking low p-values."
687
+   ]
688
+  }
689
+ ],
690
+ "metadata": {
691
+  "kernelspec": {
692
+   "display_name": "Python 3",
693
+   "language": "python",
694
+   "name": "python3"
695
+  },
696
+  "language_info": {
697
+   "codemirror_mode": {
698
+    "name": "ipython",
699
+    "version": 3
700
+   },
701
+   "file_extension": ".py",
702
+   "mimetype": "text/x-python",
703
+   "name": "python",
704
+   "nbconvert_exporter": "python",
705
+   "pygments_lexer": "ipython3",
706
+   "version": "3.8.8"
707
+  }
708
+ },
709
+ "nbformat": 4,
710
+ "nbformat_minor": 4
711
+}

+ 922
- 0
exp/exp-005_Relationship-Rating-and-Share-Actresses-on-principal-cast.ipynb
文件差異過大導致無法顯示
查看文件


Powered by TurnKey Linux.