diff --git a/notebooks/jstor-data-and-analysis.ipynb b/notebooks/jstor-data-and-analysis.ipynb index 42bde2e..10d2eea 100644 --- a/notebooks/jstor-data-and-analysis.ipynb +++ b/notebooks/jstor-data-and-analysis.ipynb @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -97,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -106,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -192,7 +192,7 @@ "3 Hardy 15.349370 23.185265 20.805369 15.021459 9.805285 11.484423" ] }, - "execution_count": 3, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -203,18 +203,18 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "vs_authors_df = vs_authors_df.melt(id_vars=[\"Author\"], \n", " var_name = \"Decade\",\n", - " value_name=\"Value\")" + " value_name=\"Percentage_of_Documents\")" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -240,7 +240,7 @@ " \n", " Author\n", " Decade\n", - " Value\n", + " Percentage_of_Documents\n", " \n", " \n", " \n", @@ -393,34 +393,34 @@ "" ], "text/plain": [ - " Author Decade Value\n", - "0 Bronte 1960s 4.238259\n", - "1 Dickens 1960s 25.429553\n", - "2 Eliot 1960s 21.534937\n", - "3 Hardy 1960s 15.349370\n", - "4 Bronte 1970s 10.292524\n", - "5 Dickens 1970s 29.902492\n", - "6 Eliot 1970s 23.618635\n", - "7 Hardy 1970s 23.185265\n", - "8 Bronte 1980s 11.409396\n", - "9 Dickens 1980s 30.285235\n", - "10 Eliot 1980s 29.194631\n", - "11 Hardy 1980s 20.805369\n", - "12 Bronte 1990s 11.659514\n", - "13 Dickens 1990s 25.894134\n", - "14 Eliot 1990s 22.031474\n", - "15 Hardy 1990s 15.021459\n", - "16 Bronte 2000s 8.901252\n", - "17 Dickens 2000s 25.173853\n", - "18 Eliot 2000s 23.922114\n", - "19 Hardy 2000s 9.805285\n", - "20 Bronte 2010s 0.122175\n", - "21 Dickens 2010s 25.656689\n", - "22 Eliot 2010s 22.052535\n", - "23 Hardy 2010s 11.484423" + " Author Decade Percentage_of_Documents\n", + "0 Bronte 1960s 4.238259\n", + "1 Dickens 1960s 25.429553\n", + "2 Eliot 1960s 21.534937\n", + "3 Hardy 1960s 15.349370\n", + "4 Bronte 1970s 10.292524\n", + "5 Dickens 1970s 29.902492\n", + "6 Eliot 1970s 23.618635\n", + "7 Hardy 1970s 23.185265\n", + "8 Bronte 1980s 11.409396\n", + "9 Dickens 1980s 30.285235\n", + "10 Eliot 1980s 29.194631\n", + "11 Hardy 1980s 20.805369\n", + "12 Bronte 1990s 11.659514\n", + "13 Dickens 1990s 25.894134\n", + "14 Eliot 1990s 22.031474\n", + "15 Hardy 1990s 15.021459\n", + "16 Bronte 2000s 8.901252\n", + "17 Dickens 2000s 25.173853\n", + "18 Eliot 2000s 23.922114\n", + "19 Hardy 2000s 9.805285\n", + "20 Bronte 2010s 0.122175\n", + "21 Dickens 2010s 25.656689\n", + "22 Eliot 2010s 22.052535\n", + "23 Hardy 2010s 11.484423" ] }, - "execution_count": 5, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -431,23 +431,23 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "vs_authors_df['Value'] = vs_authors_df['Value']* 0.01" + "vs_authors_df['Percentage_of_Documents'] = vs_authors_df['Percentage_of_Documents']* 0.01" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "line = alt.Chart(vs_authors_df, title=\"Frequency of author references in *Victorian Studies*\").mark_line().encode(\n", " x=alt.X('Decade', title=\"Decade\",type='ordinal', sort='ascending', \n", " axis=alt.Axis(labelAngle=0, labelExpr='datum.value')), \n", - " y=alt.Y('Value:Q', title=\"Percent of Documents\", axis=alt.Axis(labelAngle=0, format=\"%\")),\n", + " y=alt.Y('Percentage_of_Documents:Q', title=\"Percent of Documents\", axis=alt.Axis(labelAngle=0, format=\"%\")),\n", " color=alt.Color('Author:O', scale=alt.Scale(scheme='greys'),legend=None),\n", ")\n", "\n", @@ -481,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -489,23 +489,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, - "execution_count": 8, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -569,14 +569,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "line = alt.Chart(vs_authors_df, title=\"Frequency of author references in *Victorian Studies*\").mark_line().encode(\n", " x=alt.X('Decade', title=\"Decade\",type='ordinal', sort='ascending', \n", " axis=alt.Axis(labelAngle=0, labelExpr='datum.value')), \n", - " y=alt.Y('Value:Q', title=\"Percent of Documents\", axis=alt.Axis(labelAngle=0, format=\"%\")),\n", + " y=alt.Y('Percentage_of_Documents:Q', title=\"Percent of Documents\", axis=alt.Axis(labelAngle=0, format=\"%\")),\n", " color=alt.Color('Author:O', scale=alt.Scale(scheme='category20'),legend=None),\n", ")\n", "\n", @@ -610,7 +610,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -618,23 +618,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, - "execution_count": 10, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -705,7 +705,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -714,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -806,7 +806,7 @@ "3 6.536347 " ] }, - "execution_count": 12, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -817,34 +817,34 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "vs_titles_df = vs_titles_df.melt(id_vars=[\"Title\"], \n", " var_name = \"Decade\",\n", - " value_name=\"Value\")" + " value_name=\"Percentage_of_Documents\")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "vs_titles_df['Value'] = vs_titles_df['Value']* 0.01" + "vs_titles_df['Percentage_of_Documents'] = vs_titles_df['Percentage_of_Documents']* 0.01" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "line = alt.Chart(vs_titles_df, title=\"Frequency of title references in *Victorian Studies*\").mark_line().encode(\n", " x=alt.X('Decade', title=\"Decade\",type='ordinal', sort='ascending', \n", " axis=alt.Axis(labelAngle=0, labelExpr='datum.value')), \n", - " y=alt.Y('Value:Q', title=\"Percent of Documents\", axis=alt.Axis(labelAngle=0, format=\"%\")),\n", + " y=alt.Y('Percentage_of_Documents:Q', title=\"Percent of Documents\", axis=alt.Axis(labelAngle=0, format=\"%\")),\n", " color=alt.Color('Title:O', scale=alt.Scale(scheme='greys'),legend=None),\n", ")\n", "\n", @@ -878,7 +878,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 20, "metadata": { "scrolled": true }, @@ -888,23 +888,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, - "execution_count": 16, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -968,14 +968,14 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "line = alt.Chart(vs_titles_df, title=\"Frequency of title references in *Victorian Studies*\").mark_line().encode(\n", " x=alt.X('Decade', title=\"Decade\",type='ordinal', sort='ascending', \n", " axis=alt.Axis(labelAngle=0, labelExpr='datum.value')), \n", - " y=alt.Y('Value:Q', title=\"Percent of Documents\", axis=alt.Axis(labelAngle=0, format=\"%\")),\n", + " y=alt.Y('Percentage_of_Documents:Q', title=\"Percent of Documents\", axis=alt.Axis(labelAngle=0, format=\"%\")),\n", " color=alt.Color('Title:O', scale=alt.Scale(scheme='category20'),legend=None),\n", ")\n", "\n", @@ -1009,7 +1009,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1017,23 +1017,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, - "execution_count": 18, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -19626,7 +19626,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Figures for \"What We Quote: Disciplinary History and the Textual Atmospheres of *Middlemarch*\" essay" + "## Figures for \"What We Quote: Disciplinary History and the Textual Atmospheres of *Middlemarch*\" essay\n", + "\n", + "Visualizations draw on the following datasets:\n", + "\n", + "+ *Victorian Studies* author and title data:\n", + " + [https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/VS-author-term_frequencies.csv](https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/VS-author-term_frequencies.csv)\n", + " + [https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/VS-title-term_frequencies.csv](https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/VS-title-term_frequencies.csv)\n", + "+ Text-matcher dataset:\n", + " + [https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/t2-c3-n2-m3-no-stops.json](https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/t2-c3-n2-m3-no-stops.json)\n", + " + [https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/matches.csv](https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/matches.csv)\n", + "+ List of journals classified Victorianist:\n", + " + [https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/list-of-Victorianist-journals.csv](https://github.com/lit-mod-viz/middlemarch-critical-histories/blob/master/data/list-of-Victorianist-journals.csv)\n", + " " ] }, { @@ -19635,7 +19647,7 @@ "source": [ "### Figure 1\n", "#### Frequency of author references in *Victorian Studies*\n", - "See link to [Figure 1 in notebook](#Most-frequent-author-references-in-Victorian-Studies,-line-chart) for more context. [Color version](#Most-frequent-author-references-in-Victorian-Studies,-line-chart-(color))." + "See link to [Figure 1 context](#Most-frequent-author-references-in-Victorian-Studies,-line-chart) and [data](#Most-frequent-author-references-in-Victorian-Studies) for more details. [Color version of Figure 1](#Most-frequent-author-references-in-Victorian-Studies,-line-chart-(color))." ] }, { @@ -19732,7 +19744,7 @@ "source": [ "### Figure 2\n", "#### Frequency of novel title references in *Victorian Studies*\n", - "See link to [Figure 2 in notebook](#Most-frequent-title-references-in-Victorian-Studies,-line-chart) for more context. [Color version](#Most-frequent-title-references-in-Victorian-Studies,-line-chart-(color))" + "See link to [Figure 2 context](#Most-frequent-title-references-in-Victorian-Studies,-line-chart) and [data](#Most-frequent-title-references-in-Victorian-Studies) for more details. [Color version of Figure 2](#Most-frequent-title-references-in-Victorian-Studies,-line-chart-(color))" ] }, { @@ -19830,7 +19842,7 @@ "### Figure 3\n", "#### *Middlemarch* quotations by chapter\n", "\n", - "See link to [Figure 3 in notebook](#Number-of-quotations,-by-chapter-in-Middlemarch,-bar-chart) for more context" + "See link to [Figure 3 context](#Number-of-quotations,-by-chapter-in-Middlemarch,-bar-chart) and [data](#Number-of-quotations,-by-chapter-in-Middlemarch) for more details." ] }, { @@ -19928,7 +19940,7 @@ "### Figure 4\n", "#### Rank-frequency distribution of quotations from *Middlemarch* by chapter.\n", "\n", - "The ten most frequently quoted chapters, starting from the far left of the graph, are 20, 15, 1, 87 [Finale], 0 [Prelude], 3, 19, 10, 81, and 2. See link to [Figure 4 in notebook](#Number-of-quotations,-by-chapter-in-Middlemarch,-bar-chart-(ranked-by-frequency)) for more context" + "The ten most frequently quoted chapters, starting from the far left of the graph, are 20, 15, 1, 87 [Finale], 0 [Prelude], 3, 19, 10, 81, and 2. See link to [Figure 4 context](#Number-of-quotations,-by-chapter-in-Middlemarch,-bar-chart-(ranked-by-frequency)) and [data](#Number-of-quotations,-by-chapter-in-Middlemarch) for more details." ] }, { @@ -20025,7 +20037,7 @@ "source": [ "### Figure 5\n", "#### *Middlemarch* quotations by chapter (normalized), diachronic heatmap\n", - "See link to [Figure 5 in notebook](#Middlemarch-quotations-per-chapter,-per-decade-(normalized-and-weighted),-heat-map) for more context" + "See link to [Figure 5 context](#Middlemarch-quotations-per-chapter,-per-decade-(normalized-and-weighted),-heat-map) and [data](#Number-of-quotations-per-chapter,-per-decade-(normalized-by-decade-and-weighted-by-word-count)) for more details." ] }, { @@ -20125,7 +20137,7 @@ "\n", "Note: Chapter 0 and 87 refer to the Prelude and Finale, respectively. Chapters not among the top five are represented in light-gray.\n", "\n", - "See link to [Figure 6 in notebook](#Middlemarch-top-5-most-frequently-quoted-chapters,-line-chart) for more context. [Color version](#Middlemarch-top-5-most-frequently-quoted-chapters,-line-chart-(color))." + "See link to [Figure 6 context](#Middlemarch-top-5-most-frequently-quoted-chapters,-line-chart) and [data](#Number-of-quotations-per-chapter,-per-decade-(not-normalized-or-weighted)) for more details. [Color version of Figure 6](#Middlemarch-top-5-most-frequently-quoted-chapters,-line-chart-(color))." ] }, { @@ -20225,7 +20237,7 @@ "\n", "For a complete list of journals classified as \"Victorianist\", see: [Statistics on Victorianist journals in the dataset](#Statistics-on-Victorianist-journals-in-the-dataset).\n", "\n", - "See link to [Figure 7 in notebook](#Victorianist-journals:-Middlemarch-quotations-per-chapter,-per-decade-(normalized-and-weighted),-heat-map) for more context.\n", + "See link to [Figure 7 context](#Victorianist-journals:-Middlemarch-quotations-per-chapter,-per-decade-(normalized-and-weighted),-heat-map) and [data](#Victorianist-journals:-Middlemarch-quotations-per-chapter,-per-decade-(normalized-by-decade-and-weighted-by-word-count)) for more details.\n", "\n" ] },