import pandas as pd
import numpy as np
#import spacy
import re
import json
import altair as alt
#new viz library for single-column heatmap
import matplotlib.pyplot as plt
# import seaborn as sns
#sns.set()
#from nltk.corpus import names
from collections import Counter
from matplotlib import pyplot as plt
%matplotlib inline
plt.rcParams["figure.figsize"] = [16, 6]
plt.style.use('ggplot')


vs_authors_df = pd.read_csv("../data/VS-author-term_frequencies.csv")


vs_authors_df


vs_authors_df = vs_authors_df.melt(id_vars=["Author"], 
        var_name = "Decade",
                                   value_name="Percentage_of_Documents")


vs_authors_df


vs_authors_df['Percentage_of_Documents'] = vs_authors_df['Percentage_of_Documents']* 0.01


line = alt.Chart(vs_authors_df, title="Frequency of author references in *Victorian Studies*").mark_line().encode(
    x=alt.X('Decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value')), 
    y=alt.Y('Percentage_of_Documents:Q', title="Percent of Documents", axis=alt.Axis(labelAngle=0, format="%")),
    color=alt.Color('Author:O', scale=alt.Scale(scheme='greys'),legend=None),
)

points = line.mark_point(filled=True).encode(
    color=alt.Color('Author:O', scale=alt.Scale(scheme='greys')),
    shape=alt.Shape('Author:O', scale=alt.Scale(range=[ 'circle', 'cross', 'square', 'triangle-right', 'diamond'])),
    size=alt.Size('Author:O', legend=None, scale=alt.Scale(range=[200,200],domain=['Eliot', 'Dickens', 'Bronte', 'Hardy']))
)

auth_chart = alt.layer(
    line,
    points
).resolve_scale(
    color='independent',
    shape='independent'
).properties(width=400).configure_legend(
titleFontSize=11,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


auth_chart#.save('Figure-1.png', ppi=300)


line = alt.Chart(vs_authors_df, title="Frequency of author references in *Victorian Studies*").mark_line().encode(
    x=alt.X('Decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value')), 
    y=alt.Y('Percentage_of_Documents:Q', title="Percent of Documents", axis=alt.Axis(labelAngle=0, format="%")),
    color=alt.Color('Author:O', scale=alt.Scale(scheme='category20'),legend=None),
)

points = line.mark_point(filled=True).encode(
    color=alt.Color('Author:O', scale=alt.Scale(scheme='category20')),
    shape=alt.Shape('Author:O', scale=alt.Scale(range=[ 'circle', 'cross', 'square', 'triangle-right', 'diamond'])),
    size=alt.Size('Author:O', legend=None, scale=alt.Scale(range=[200,200],domain=['Eliot', 'Dickens', 'Bronte', 'Hardy']))
)

auth_chart_color = alt.layer(
    line,
    points
).resolve_scale(
    color='independent',
    shape='independent'
).properties(width=400).configure_legend(
titleFontSize=11,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


auth_chart_color


vs_titles_df = pd.read_csv("../data/VS-title-term_frequencies.csv")


vs_titles_df


vs_titles_df = vs_titles_df.melt(id_vars=["Title"], 
        var_name = "Decade",
                                   value_name="Percentage_of_Documents")


vs_titles_df['Percentage_of_Documents'] = vs_titles_df['Percentage_of_Documents']* 0.01


line = alt.Chart(vs_titles_df, title="Frequency of title references in *Victorian Studies*").mark_line().encode(
    x=alt.X('Decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value')), 
    y=alt.Y('Percentage_of_Documents:Q', title="Percent of Documents", axis=alt.Axis(labelAngle=0, format="%")),
    color=alt.Color('Title:O', scale=alt.Scale(scheme='greys'),legend=None),
)

points = line.mark_point(filled=True).encode(
    color=alt.Color('Title:O', scale=alt.Scale(scheme='greys')),
    shape=alt.Shape('Title:O', scale=alt.Scale(range=[ 'circle', 'cross', 'triangle-right', 'square','diamond'])),
    size=alt.Size('Title:O', legend=None, scale=alt.Scale(range=[200,200],domain=['Bleak House', 'David Copperfield', 'Middlemarch','Great Expectations', ]))
)

title_chart = alt.layer(
    line,
    points
).resolve_scale(
    color='independent',
    shape='independent'
).properties(width=400).configure_legend(
titleFontSize=11,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


title_chart#.save('Figure-2.png', ppi=300)


line = alt.Chart(vs_titles_df, title="Frequency of title references in *Victorian Studies*").mark_line().encode(
    x=alt.X('Decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value')), 
    y=alt.Y('Percentage_of_Documents:Q', title="Percent of Documents", axis=alt.Axis(labelAngle=0, format="%")),
    color=alt.Color('Title:O', scale=alt.Scale(scheme='category20'),legend=None),
)

points = line.mark_point(filled=True).encode(
    color=alt.Color('Title:O', scale=alt.Scale(scheme='category20')),
    shape=alt.Shape('Title:O', scale=alt.Scale(range=[ 'circle', 'cross', 'triangle-right', 'square','diamond'])),
    size=alt.Size('Title:O', legend=None, scale=alt.Scale(range=[200,200],domain=['Bleak House', 'David Copperfield', 'Middlemarch','Great Expectations', ]))
)

title_chart_color = alt.layer(
    line,
    points
).resolve_scale(
    color='independent',
    shape='independent'
).properties(width=400).configure_legend(
titleFontSize=11,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


title_chart_color


with open('../middlemarch.txt') as f: 
    mm = f.read()


textALength = len(mm)


# Get chapter locations
chapterMatches = re.finditer('PRELUDE|CHAPTER|FINALE', mm)
chapterLocations = [match.start() for match in chapterMatches]
chapterLocations.append(textALength) # Add one to account for last chunk. 
len(chapterLocations)

89


# Get paragraph locations
paragraphMatches = re.finditer('\n\n', mm)
paragraphLocations = [match.start() for match in paragraphMatches]
paragraphLocations.append(textALength)
len(paragraphLocations)

4890


# Get book locations
bookLocations = [match.start() for match in re.finditer('\nBOOK', mm)]
bookLocations = [0] + bookLocations + [textALength] # Add one to account for last chunk.
bookLocations

[0, 38, 250307, 481579, 681858, 915901, 1138247, 1364956, 1571148, 1793449]


def getChapters(text): 
    chapters = []
    for i, loc in enumerate(chapterLocations): 
        if i != len(chapterLocations)-1: 
            chapter = mm[loc:chapterLocations[i+1]]
            chapters.append(chapter)
    return chapters


chapters = getChapters(mm)
chapterLengths = [len(chapter.split()) for chapter in chapters]
chapterLengthsSeries = pd.Series(chapterLengths)
chapterLengthsSeries.plot(kind='bar', title='Middlemarch Chapter Lengths')

<Axes: title={'center': 'Middlemarch Chapter Lengths'}>


def getParagraphs(text): 
    paragraphs = []
    for i, loc in enumerate(paragraphLocations): 
        if i != len(paragraphLocations)-1: 
            paragraph = mm[loc:paragraphLocations[i+1]]
            paragraphs.append(paragraph)
    return paragraph


paragraphs = getParagraphs(mm)
paragraphLengths = [len(paragraph.split()) for paragraph in paragraphs]
paragraphLengthsSeries = pd.Series(paragraphLengths)


paragraphs

'\n\nHer finely touched spirit had still its fine issues, though they were\nnot widely visible.  Her full nature, like that river of which Cyrus\nbroke the strength, spent itself in channels which had no great name on\nthe earth.  But the effect of her being on those around her was\nincalculably diffusive: for the growing good of the world is partly\ndependent on unhistoric acts; and that things are not so ill with you\nand me as they might have been, is half owing to the number who lived\nfaithfully a hidden life, and rest in unvisited tombs.\n'


df = pd.read_json('../data/t2-c3-n2-m3-no-stops.json')

df


df.to_csv('../data/matches.csv', encoding='utf-8')


# New for JSTOR 2022 data
df['year'] = pd.DatetimeIndex(df['datePublished']).year


# New for JSTOR 2022 data
df['year']

0       2006
1       1970
2       1982
3       1925
4       2011
        ... 
5879    2005
5880    2005
5881    2013
5882    1990
5883    1964
Name: year, Length: 5884, dtype: int64


df['Decade'] = df['year'] - (df['year'] % 10)
# df['Locations in A'] = df['matches'].apply(lambda x: x[1])
# df['NumMatches'] = df['matches'].apply(lambda x: x[0])


df.columns

Index(['creator', 'datePublished', 'docSubType', 'docType', 'id', 'identifier',
       'isPartOf', 'issueNumber', 'language', 'outputFormat', 'pageCount',
       'pageEnd', 'pageStart', 'pagination', 'provider', 'publicationYear',
       'publisher', 'sourceCategory', 'tdmCategory', 'title', 'url',
       'volumeNumber', 'wordCount', 'numMatches', 'Locations in A',
       'Locations in B', 'abstract', 'keyphrase', 'subTitle', 'year',
       'Decade'],
      dtype='object')


def diachronicAnalysis(df, decades=(1950, 2020), bins=chapterLocations, useWordcounts=True, normalize=True):
    """ Turning on useWordcounts makes it so that it's weighted by wordcount. 
    Turning it off uses raw numbers of quotations. """
    decades = np.arange(decades[0], decades[1], 10)
    # Make a dictionary of decades. 
    # Values are a list of locations.  
    decadeDict = {}
    for i, row in df.iterrows():
        decade = row['Decade']
        locationsAndWordcounts = row['Locations in A with Wordcounts']
        if decade not in decadeDict: 
            decadeDict[decade] = locationsAndWordcounts.copy()
        else: 
            decadeDict[decade] += locationsAndWordcounts.copy()
    # Grab the beginnings of quotes. 
    decadeStartsWeights = {decade: [(item[0][0], item[1]) 
                                    for item in loc] 
                    for decade, loc in decadeDict.items()}
    if useWordcounts: 
        decadesBinned = {decade: 
                     np.histogram([loc[0] for loc in locations], 
                                  bins=bins,
                                  weights=[loc[1] for loc in locations],
                                  range=(0, textALength))[0]
                     for decade, locations in decadeStartsWeights.items() 
                         if decade in decades}
    else: 
        decadesBinned = {decade: 
                     np.histogram([loc[0] for loc in locations], 
                                  bins=bins,
                                  range=(0, textALength))[0]
                     for decade, locations in decadeStartsWeights.items() 
                         if decade in decades}
    decadesDF = pd.DataFrame(decadesBinned).T
    #Normalize
    if normalize: 
        decadesDF = decadesDF.div(decadesDF.max(axis=1), axis=0)
    return decadesDF

def countWords(locRange): 
    """ Counts words in middlemarch, given character ranges. """
    chunk = mm[locRange[0]:locRange[1]]
    return len(chunk.split())

def totalWords(locRangeSet): 
    """ Counts total words in a list of location ranges. """
    return sum([countWords(locRange) for locRange in locRangeSet])    
    
def countsPerSet(locRangeSet): 
    """ Returns an augmented location range set that includes word counts. """
    return [(locRange, countWords(locRange))
             for locRange in locRangeSet]
    
def extractWordcounts(locsAndWordcounts): 
    """ 
    Takes pairs of location ranges and wordcounts, 
    and returns just the wordcounts. 
    """
    return [item[1] for item in locsAndWordcounts 
            if len(locsAndWordcounts) > 0]

def synchronicAnalysis(df, bins=chapterLocations, useWordcounts=True): 
    locs = df['Locations in A'].values
    locCounts = [(loc, countWords(loc)) for locSet in locs
              for loc in locSet]
    starts = [loc[0][0] for loc in locCounts]
    counts = [loc[1] for loc in locCounts]
    if useWordcounts: 
        binned = np.histogram(starts, bins=bins, 
                              weights=counts, range=(0, textALength))
    else: 
        binned = np.histogram(starts, bins=bins, 
                              range=(0, textALength))
    binnedDF = pd.Series(binned[0])
    return binnedDF

def plotDiachronicAnalysis(df, save=False, reverse=False): 
    ylabels = [str(int(decade)) for decade in df.index] + ['2020']
    plt.pcolor(df, cmap='gnuplot')
    plt.yticks(np.arange(len(df.index)+1), ylabels)
    plt.gca().invert_yaxis()
    plt.ylabel('Decade')
    plt.xlabel('Chapter')
    plt.gca().set_xlim((0, len(df.T)))
    plt.colorbar(ticks=[])
    if save: 
        plt.savefig('diachronic.png', bboxinches='tight', dpi=300, transparent=True)
    plt.show()
    
def plotSynchronicAnalysis(s, useWordcounts=True): 
    ax = s.plot(kind='bar')
    ax.set_xlabel('Chapter')
    if useWordcounts: 
        ax.set_ylabel('Number of Words Quoted')
    else: 
        ax.set_ylabel('Number of Quotations')
        
def plotSynchronicAnalysisHeatmap(s, useWordcounts=True): 
    vec1=synchronicAnalysis(df, useWordcounts=False)
    fig, ax = plt.subplots()
    sns.color_palette("magma")
    sns.heatmap([vec1])
    ax.set_xlabel('Chapter')
    ax.set_ylabel('Number of Quotations')
    
def plotDiachronicAnalysisBubble(df, save=False, reverse=False):
    ylabels = [str(int(decade)) for decade in df.index] + ['2020'] 
    alt.Chart(df).mark_circle().encode(
    x='Chapter',
    y='Decade',
    size='sum(count):Q'
)


df['Quoted Words'] = df['Locations in A'].apply(totalWords)


df['Locations in A with Wordcounts'] = df['Locations in A'].apply(countsPerSet)


df['Wordcounts'] = df['Locations in A with Wordcounts'].apply(extractWordcounts)


# Verify that the diachronic wordcounts are the same as the synchronic wordcounts
decadeSums = diachronicAnalysis(df, decades=(1700, 2020), useWordcounts=True, normalize=False).sum(axis=1)
decadeSums.sum()

119747


chapterSums = synchronicAnalysis(df)
chapterSums.sum()

119892


sum([len(item) for item in df['Locations in A'].values])

3800


allMatches = []
for group in df['Locations in A'].values: 
    for pair in group: 
        allMatches.append(pair)


len(allMatches)

3800


print("Total articles with 'Middlemarch' appearing somewhere in text or metadata:")
len(df) # Total articles with "Middlemarch" mentioned somewhere

Total articles with 'Middlemarch' appearing somewhere in text or metadata:

5884


articlesWithMatches = df[df['Locations in A'].apply(lambda x: len(x) > 0)]
articlesWithMatches.year.describe()

count    1540.000000
mean     1991.488961
std        19.713886
min      1900.000000
25%      1980.000000
50%      1994.000000
75%      2007.000000
max      2022.000000
Name: year, dtype: float64


print("Number of articles with matches to text in 'Middlemarch':")
articlesWithMatches['Locations in A'].count()

Number of articles with matches to text in 'Middlemarch':

1540


articlesWithMatches['isPartOf'].value_counts()[:300]


articlesWithMatches['docType'].value_counts()

article    1500
chapter      40
Name: docType, dtype: int64


articlesWithMatches.Wordcounts.apply(len).head()

0      1
9      1
17    16
19     3
21     7
Name: Wordcounts, dtype: int64


# articlesWithMatches.to_json('../data/cleaned-matches.json')


# New for JSTOR 2022 dataset, because we have more data
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')


alt.Chart(articlesWithMatches, title="Number of JSTOR articles with detected Middlemarch matches, by year").mark_bar().encode(x='year:O', y='count()').properties(width=1000)


# Try to find out what articles contain no Middlemarch citations
articlesWithoutMatches = df[df['Locations in A'].apply(lambda x: len(x) == 0)]


n = 10
articlesWithoutMatches['title'].value_counts()[:n].index.tolist()

['Review Article',
 'Front Matter',
 'Back Matter',
 'Volume Information',
 'Summary of Periodical Literature',
 'Index',
 'Recent Studies in the Nineteenth Century',
 'Books Received',
 'List of Publications Received',
 'INDEX']


# What is the most frequent name of articles with no citations?
articlesWithoutMatches['title'].describe()

count               4344
unique              2304
top       Review Article
freq                1199
Name: title, dtype: object


def isGarbage(itemTitle): 
    badTitles = ['front matter', 'back matter', 'table of contents', 'cover']
    if itemTitle == None: 
        return False
    for title in itemTitle: 
        for badTitle in badTitles: 
            if badTitle in title.lower(): 
                return True
    return False


print("Number of'front matter', 'back matter', 'table of contents', 'cover' items in the 6069-article JSTOR dataset:")
len(df[df.title.apply(isGarbage)]) # How many garbage items?

Number of'front matter', 'back matter', 'table of contents', 'cover' items in the 6069-article JSTOR dataset:

0


wordcounts = []
for countSet in df['Wordcounts'].values: 
    for count in countSet: 
        wordcounts.append(count)


pd.Series(wordcounts).hist()

<Axes: >


df['Quoted Words'].describe()

count    5884.000000
mean       20.375935
std        94.477822
min         0.000000
25%         0.000000
50%         0.000000
75%         4.000000
max      2138.000000
Name: Quoted Words, dtype: float64


print('Statistics on average, min, and max number of quoted words per item:')
articlesWithMatches['Quoted Words'].describe()

Statistics on average, min, and max number of quoted words per item:

count    1540.000000
mean       77.851948
std       172.172395
min         2.000000
25%         6.000000
50%        17.000000
75%        64.000000
max      2138.000000
Name: Quoted Words, dtype: float64


len(df[df['Quoted Words'] > 0])

1540


articlesWithMatches['Quoted Words'].hist()

<Axes: >


print('Average number of words per match, per item:')
articlesWithMatches['Wordcounts'].apply(np.mean).head()

Average number of words per match, per item:

0      4.000000
9     23.000000
17    21.812500
19    22.333333
21    60.000000
Name: Wordcounts, dtype: float64


print('Statistics on average number of words, min/max per match, per item:')
articlesWithMatches['Wordcounts'].apply(np.mean).describe()

Statistics on average number of words, min/max per match, per item:

count    1540.000000
mean       24.711784
std        29.718071
min         2.000000
25%         6.000000
50%        15.000000
75%        32.000000
max       371.250000
Name: Wordcounts, dtype: float64


wordsQuotedPerBook = synchronicAnalysis(df, bins=bookLocations, useWordcounts=True)
wordsQuotedPerBook

0        0
1    28112
2    32378
3    11351
4    10677
5     6938
6     6045
7     3882
8    20509
dtype: int64


wordsQuotedPerBook = pd.DataFrame(wordsQuotedPerBook, index=range(1,9), columns=['Number of Words Quoted'])
wordsQuotedPerBook['Book'] = range(1, 9)
wordsQuotedPerBook


alt.Chart(wordsQuotedPerBook, title="Number of Words Quoted, per Book in Middlemarch").\
mark_bar().encode(x=alt.X('Book:O', axis=alt.Axis(title="Book", labelAngle=0)), y='Number of Words Quoted:Q').\
properties(width=500)


quotationsPerBook = synchronicAnalysis(df, bins=bookLocations, useWordcounts=False)
quotationsPerBook

0       0
1    1036
2     880
3     334
4     311
5     251
6     224
7     142
8     622
dtype: int64


quotationsPerBook = pd.DataFrame(quotationsPerBook, index=range(1,9), columns=['Number of Quotations'])
quotationsPerBook['Book'] = range(1, 9)
quotationsPerBook


quotationsPerBook['Number of Quotations'].sum()

3800


alt.Chart(quotationsPerBook, title="Number of Quotations, per Book in Middlemarch").\
mark_bar().encode(x=alt.X('Book:O', axis=alt.Axis(title="Book", labelAngle=0)), y='Number of Quotations:Q').\
properties(width=500)


synchronicAnalysis(df, useWordcounts=True).to_csv('../papers/spring2017-middlemarch-paper/data/num-words-quoted-per-chapter.csv')


wordsQuotedPerChapter = synchronicAnalysis(df, bins=chapterLocations, useWordcounts=True)


wordsQuotedPerChapter = pd.DataFrame(wordsQuotedPerChapter, index=range(0,88), columns=['Number of Words Quoted'])
wordsQuotedPerChapter['Chapter'] = range(0, 88)
wordsQuotedPerChapter


alt.Chart(wordsQuotedPerChapter, title="Number of Words Quoted, per Chapter in Middlemarch").\
mark_bar().encode(x=alt.X('Chapter:O', axis=alt.Axis(title="Chapter", labelAngle=0, values=list(range(0, 87, 5)))), y='Number of Words Quoted:Q').\
properties(width=800).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
    labelFontSize=14,
    titleFontSize=14)


quotationsPerChapter = synchronicAnalysis(df, bins=chapterLocations, useWordcounts=False)


quotationsPerChapter = pd.DataFrame(quotationsPerChapter, index=range(0,88), columns=['Number of Quotations'])
quotationsPerChapter['Chapter'] = range(0, 88)
quotationsPerChapter


quotes_per_chap = alt.Chart(quotationsPerChapter, title="Number of Quotations, per Chapter in Middlemarch").\
mark_bar().encode(x=alt.X('Chapter:O', axis=alt.Axis(title="Chapter", labelAngle=0, values=list(range(0, 87, 5)))), y='Number of Quotations:Q').\
properties(width=800).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
    labelFontSize=14,
    titleFontSize=14)
quotes_per_chap


#!pip install vl-convert-python


alt.Chart(quotationsPerChapter).\
mark_bar().encode(x=alt.X('Chapter:O', axis=alt.Axis(title="Chapter", labelAngle=0, values=list(range(0, 87, 5)))), y='Number of Quotations:Q').\
properties(width=800).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
    labelFontSize=14,
    titleFontSize=14).save('Figure-3.png', ppi=300)


ranked_freq_chap = alt.Chart(quotationsPerChapter, title="Number of Quotations, per Chapter in Middlemarch").\
mark_bar().encode(x=alt.X('Chapter:O', sort='-y', axis=alt.Axis(labelExpr='"Chap." + datum.value', values=())), y=alt.Y('Number of Quotations:Q', axis=alt.Axis(labelAngle=0))).\
properties(width=800).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
    labelFontSize=14,
    titleFontSize=14
)
ranked_freq_chap#.save('Figure-4.png', ppi=300)


alt.Chart(quotationsPerChapter, title="Number of Quotations, per Chapter in Middlemarch").\
mark_bar().encode(x=alt.X('Number of Quotations:Q'), y=alt.Y('Chapter:O', sort='-x', axis=alt.Axis(title="Chapters, by frequency quoted"))).\
properties().configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
    labelFontSize=14,
    titleFontSize=14
)


alt.Chart(quotationsPerChapter, title="Number of Quotations, per Chapter in Middlemarch").\
mark_bar().encode(x=alt.X('Chapter:O', sort='-y', axis=alt.Axis(labelExpr='"Chap." + datum.value', values=(20, 15, 1, 87, 10, 2, 0, 3,19, 81))), y=alt.Y('Number of Quotations:Q', axis=alt.Axis(labelAngle=-90))).\
properties(width=800).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
)


#alt.Chart(quotationsPerChapter).\
#mark_bar().encode(x=alt.X('Chapter:O', sort='-y', axis=alt.Axis(labelExpr='"Chap." + datum.value', values=())), y=alt.Y('Number of Quotations:Q', axis=alt.Axis(labelAngle=0))).\
#properties(width=800).configure_legend(
#titleFontSize=14,
#labelFontSize=14
#).configure_axis(
#    labelFontSize=14,
#    titleFontSize=14
#).save('Figure-4.png', ppi=300)


#alt.Chart(quotationsPerChapter, title="Number of Quotations, per Chapter in Middlemarch").\
#mark_bar().encode(x=alt.X('Chapter:O', sort='-y', axis=alt.Axis(labelExpr='"Chap." + datum.value', values=())), y=alt.Y('Number of Quotations:Q', axis=alt.Axis(labelAngle=-90))).\
#properties(width=900).configure_legend(
#titleFontSize=14,
#labelFontSize=14
#).configure_axis(
#    labelFontSize=12,
#    titleFontSize=14
#)


quotationsPerParagraph = synchronicAnalysis(df, bins=paragraphLocations, useWordcounts=False)


quotationsPerParagraph = pd.DataFrame(quotationsPerParagraph, index=range(0,4889), columns=['Number of Quotations'])
quotationsPerParagraph['Paragraph'] = range(0, 4889)
quotationsPerParagraph


# Let's remove
nonzeroquotationsPerParagraph = quotationsPerParagraph[quotationsPerParagraph['Number of Quotations'] != 0]


nonzeroquotationsPerParagraph.groupby('Number of Quotations').count()


#ax = nonzeroquotationsPerParagraph['Number of Quotations'].sort_values(ascending=False).plot(kind='bar',\                                                                                     title="Number of Middlemarch Quotations, by Paragraph, Sorted by Frequency", figsize=(40,10))
#ax.set_xlabel('Paragraph')
#ax.set_ylabel('Number of Quotations')


# Get the raw number of quotations per chapter
# synchronicAnalysis(df, useWordcounts=False).to_csv('../papers/spring2017-middlemarch-paper/data/num-quotations-per-chapter.csv')


# Adjusted for the number of words in each chapter
ax = (synchronicAnalysis(df) / chapterLengthsSeries).plot(kind='bar')
ax.set_xlabel('Chapter')
ax.set_ylabel('Words Quoted, Normalized')

Text(0, 0.5, 'Words Quoted, Normalized')


# Raw quotation counts (not weighted by the number of words in the quoatation or normalized by decade)
# Turning on useWordcounts makes it so that it's weighted by wordcount. Turning it off uses raw numbers of quotations.
booksNotNormalizedNotWeightedDiaDF = diachronicAnalysis(df, decades=(1960, 2020), bins=bookLocations,\
                                                        useWordcounts=False, normalize=False).sort_index()
print('Number of quotations per book, per decade')
booksNotNormalizedNotWeightedDiaDF

Number of quotations per book, per decade


booksNotNormalizedNotWeightedDiaDF['decade'] = booksNotNormalizedNotWeightedDiaDF.index


booksNotNormalizedNotWeightedDiaDFMelted = booksNotNormalizedNotWeightedDiaDF.melt(id_vars='decade', var_name='book')


# cut out erroneous "book 0" material (ie title page)
booksNotNormalizedNotWeightedDiaDFMelted = booksNotNormalizedNotWeightedDiaDFMelted[booksNotNormalizedNotWeightedDiaDFMelted.book != 0]


booksNotNormalizedNotWeightedDiaDFMeltedExport = booksNotNormalizedNotWeightedDiaDFMelted.rename(columns={"value": "Number of Quotations"})
# To export a CSV, uncomment the line below
# booksNotNormalizedNotWeightedDiaDFMeltedExport.to_csv("Middlemarch-quotations-per-book-per-decade-not-weighted-or-normalized.csv")


alt.Chart(booksNotNormalizedNotWeightedDiaDFMelted,\
          title="Middlemarch quotations per book, per decade (not weighted or normalized)")\
.mark_rect().encode(x=alt.X('book', type='ordinal', 
                            axis=alt.Axis(labelAngle=0)), 
                    y=alt.Y('decade', type='ordinal', sort='descending',
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                    color=alt.Color('value', legend=alt.Legend(title="Number of Quotations"))).properties(width=500, height=300).configure_axis(
    labelFontSize=14,
    titleFontSize=14
).configure_legend(
titleFontSize=14,
labelFontSize=14
)


alt.Chart(booksNotNormalizedNotWeightedDiaDFMelted,\
          title="Middlemarch quotations per book, per decade (not weighted or normalized)")\
.mark_rect().encode(x=alt.X('decade', type='ordinal',
                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
                    y=alt.Y('book', type='ordinal', sort='descending'), 
                    color=alt.Color('value', legend=alt.Legend(title="Number of Quotations (normalized)"))).properties(width=500, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
    labelFontSize=14,
    titleFontSize=14
)


# Number of quotations (not weighted), normalized by decade(counts are scaled to the maximum value per decade)
booksNotWeightedDiaDF = diachronicAnalysis(df, decades=(1960, 2020), bins=bookLocations, useWordcounts=False, normalize=True).sort_index()
print('Quotations per book, per decade (normalized by decade):')
booksNotWeightedDiaDF

Quotations per book, per decade (normalized by decade):


# Weighted by wordcount (by the number of words in the quotation) and normalized by decade(counts are scaled to the maximum value per decade)
booksDiaDF = diachronicAnalysis(df, decades=(1960, 2020), bins=bookLocations, useWordcounts=True, normalize=True).sort_index()
print('Quotations per book, per decade (weighted by length of quotation and normalized by decade):')
booksDiaDF

Quotations per book, per decade (weighted by length of quotation and normalized by decade):


booksDiaDF['decade'] = booksDiaDF.index


booksMelted = booksDiaDF.melt(id_vars='decade', var_name='book')


# cut out erroneous "book 0" material (ie title page)
booksMelted = booksMelted[booksMelted.book != 0]


alt.Chart(booksMelted,\
          title="Middlemarch quotations per book, per decade (weighted by length of quotation and normalized by decade)")\
.mark_rect().encode(x=alt.X('book', type='ordinal', 
                            axis=alt.Axis(labelAngle=0)), 
                    y=alt.Y('decade', type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                    color=alt.Color('value', legend=alt.Legend(title="Amount Quoted"))).properties(width=500, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
    labelFontSize=14,
    titleFontSize=14
)


# Raw quotation counts (not weighted by the number of words in the quoatation or normalized by decade)
# Turning on useWordcounts makes it so that it's weighted by wordcount. Turning it off uses raw numbers of quotations.
diaDFquoteOnly = diachronicAnalysis(df, decades=(1960, 2020), bins=chapterLocations, useWordcounts=False, normalize=False).sort_index()
diaDFquoteOnly.columns.name ='chapter'
diaDFquoteOnly.index.name = 'decade'


with pd.option_context("display.min_rows", 6, "display.max_rows", 100, \
                       "display.max_columns", 90, 'display.max_colwidth', 150):
    display(diaDFquoteOnly)


diaDFquoteOnly['decade'] = diaDFquoteOnly.index


diaDFquoteOnlyMelted = diaDFquoteOnly.melt(id_vars='decade')


#Chart with raw quotations
alt.Chart(diaDFquoteOnlyMelted, title="Middlemarch quotations per chapter, per decade (not weighted or normalized)")\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', 
                            axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade", type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                    color=alt.Color('value', legend=alt.Legend(title="Number of Quotations")))\
.properties(width=1000, height=300).configure_axis(
    labelFontSize=14,
    titleFontSize=14
).configure_legend(
titleFontSize=14,
labelFontSize=14
)


#Chart with raw quotations, transposed
alt.Chart(diaDFquoteOnlyMelted, title="Middlemarch quotations per chapter, per decade (not weighted or normalized)")\
.mark_rect().encode(x=alt.X('decade',title="Decade", type='ordinal', sort='ascending', axis=alt.Axis(labelAngle=0, 
                                                            labelExpr='datum.value + "s"')), 
                    y=alt.Y('chapter', title="Chapter", type='ordinal', sort='descending', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    color=alt.Color('value', legend=alt.Legend(title="Number of Quotations")))\
.properties(width=300, height=1000).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Weighted by wordcount (by the number of words in the quoatation) and normalized by decade(counts are scaled to the maximum value per decade)
diaDF = diachronicAnalysis(df, decades=(1960, 2020), bins=chapterLocations, useWordcounts=True, normalize=True).sort_index()
diaDF.columns.name = 'chapter'
diaDF.index.name = 'decade'


with pd.option_context("display.min_rows", 6, "display.max_rows", 100, \
                       "display.max_columns", 90, 'display.max_colwidth', 150):
    display(diaDF)


diaDF['decade'] = diaDF.index


diaMelted = diaDF.melt(id_vars='decade')


diachronic_chap = alt.Chart(diaMelted, title="Middlemarch quotations per chapter, per decade (normalized by decade and weighted by word count)")\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Amount Quoted")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)
diachronic_chap


alt.Chart(diaMelted, )\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Amount Quoted")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
).save('Figure-5.png', ppi=300)


top5 = diaDFquoteOnlyMelted["chapter"].where(diaDFquoteOnlyMelted["chapter"].isin([0, 1, 15, 20, 87]), other="Other")


diaDFquoteOnlyMelted['top5'] = top5


color = alt.condition(alt.datum.top5 == 'top5',
                      alt.Color('chapter:O', legend=None),
                      alt.value('gainsboro')
                     )

line = alt.Chart(diaDFquoteOnlyMelted.loc[diaDFquoteOnlyMelted['chapter'].isin([0, 1, 15, 20, 87])], title="Middlemarch top 5 most quoted chapters, by decade (not weighted or normalized)")\
.mark_line().encode(
    x=alt.X('decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
    y=alt.Y('value:Q', title="Number of Quotations", axis=alt.Axis(labelAngle=0)),
    color=alt.Color('chapter:O', scale=alt.Scale(scheme='greys'), legend=None,),
        
)

points = line.mark_point(filled=True).encode(
    color=alt.Color('chapter:O', scale=alt.Scale(scheme='greys')),
    shape=alt.Shape('chapter:O', legend=alt.Legend(title="Chapter"), scale=alt.Scale(range=['square', 'circle',  'cross','triangle-right', 'diamond'])),
    size=alt.Size('chapter', legend=None, scale=alt.Scale(range=[200,200],domain=['0', '1', '15', '20', '87']))
)

greyed = alt.Chart(diaDFquoteOnlyMelted.loc[~diaDFquoteOnlyMelted['chapter'].isin([0, 1, 15, 20, 87])])\
.mark_line().encode(
    x=alt.X('decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
    y=alt.Y('value:Q', title="Number of Quotations", axis=alt.Axis(labelAngle=0)),
    color=color, 
)

top5_chart = alt.layer(
    greyed,
    line,
    points,
).resolve_scale(
    color='independent',
    shape='independent'
).properties(width=400).configure_legend(
titleFontSize=11,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)
#top5_chart.save('Figure-6.png', ppi=300)


top5_chart#.save('Figure-6.png', ppi=300)


color = alt.condition(alt.datum.top5 == 'top5',
                      alt.Color('chapter:O', legend=None),
                      alt.value('gainsboro')
                     )

line = alt.Chart(diaDFquoteOnlyMelted.loc[diaDFquoteOnlyMelted['chapter'].isin([0, 1, 15, 20, 87])], title="Middlemarch top 5 most quoted chapters, by decade (not weighted or normalized)")\
.mark_line().encode(
    x=alt.X('decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
    y=alt.Y('value:Q', title="Number of Quotations", axis=alt.Axis(labelAngle=0)),
    color=alt.Color('chapter:O', scale=alt.Scale(scheme='category20'), legend=None,),
        
)

points = line.mark_point(filled=True).encode(
    color=alt.Color('chapter:O', scale=alt.Scale(scheme='category20')),
    shape=alt.Shape('chapter:O', legend=alt.Legend(title="Chapter"), scale=alt.Scale(range=['square', 'circle',  'cross','triangle-right', 'diamond'])),
    size=alt.Size('chapter', legend=None, scale=alt.Scale(range=[200,200],domain=['0', '1', '15', '20', '87']))
)

greyed = alt.Chart(diaDFquoteOnlyMelted.loc[~diaDFquoteOnlyMelted['chapter'].isin([0, 1, 15, 20, 87])])\
.mark_line().encode(
    x=alt.X('decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
    y=alt.Y('value:Q', title="Number of Quotations", axis=alt.Axis(labelAngle=0)),
    color=color, 
)

top5_chart_color = alt.layer(
    greyed,
    line,
    points,
).resolve_scale(
    color='independent',
    shape='independent'
).properties(width=400).configure_legend(
titleFontSize=11,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


top5_chart_color


color = alt.condition(alt.datum.top5 == 'top5',
                      alt.Color('chapter:O', legend=None),
                      alt.value('gainsboro')
                     )

line = alt.Chart(diaMelted.loc[diaMelted['chapter'].isin([0, 1, 15, 20, 87])], title="Middlemarch top 5 most quoted chapters, by decade (normalized and weighted)")\
.mark_line().encode(
    x=alt.X('decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
    y=alt.Y('value:Q', title="Amount Quoted", axis=alt.Axis(labelAngle=0)),
    color=alt.Color('chapter:O', scale=alt.Scale(scheme='greys'), legend=None,),
        
)

points = line.mark_point(filled=True).encode(
    color=alt.Color('chapter:O', scale=alt.Scale(scheme='greys')),
    shape=alt.Shape('chapter:O', legend=alt.Legend(title="Chapter"), scale=alt.Scale(range=['square', 'circle',  'cross','triangle-right', 'diamond'])),
    size=alt.Size('chapter', legend=None, scale=alt.Scale(range=[200,200],domain=['0', '1', '15', '20', '87']))
)

greyed = alt.Chart(diaMelted.loc[~diaMelted['chapter'].isin([0, 1, 15, 20, 87])])\
.mark_line().encode(
    x=alt.X('decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
    y=alt.Y('value:Q', title="Amount Quoted", axis=alt.Axis(labelAngle=0)),
    color=color, 
)

top5_chart_normalized = alt.layer(
    greyed,
    line,
    points,
).resolve_scale(
    color='independent',
    shape='independent'
).properties(width=400).configure_legend(
titleFontSize=11,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


top5_chart_normalized


color = alt.condition(alt.datum.top5 == 'top5',
                      alt.Color('chapter:O', legend=None),
                      alt.value('gainsboro')
                     )

line = alt.Chart(diaMelted.loc[diaMelted['chapter'].isin([0, 1, 15, 20, 87])], title="Middlemarch top 5 most quoted chapters, by decade (normalized and weighted)")\
.mark_line().encode(
    x=alt.X('decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
    y=alt.Y('value:Q', title="Amount Quoted", axis=alt.Axis(labelAngle=0)),
    color=alt.Color('chapter:O', scale=alt.Scale(scheme='category20'), legend=None,),
        
)

points = line.mark_point(filled=True).encode(
    color=alt.Color('chapter:O', scale=alt.Scale(scheme='category20')),
    shape=alt.Shape('chapter:O', legend=alt.Legend(title="Chapter"), scale=alt.Scale(range=['square', 'circle',  'cross','triangle-right', 'diamond'])),
    size=alt.Size('chapter', legend=None, scale=alt.Scale(range=[200,200],domain=['0', '1', '15', '20', '87']))
)

greyed = alt.Chart(diaMelted.loc[~diaMelted['chapter'].isin([0, 1, 15, 20, 87])])\
.mark_line().encode(
    x=alt.X('decade', title="Decade",type='ordinal', sort='ascending', 
                                                            axis=alt.Axis(labelAngle=0, labelExpr='datum.value + "s"')), 
    y=alt.Y('value:Q', title="Amount Quoted", axis=alt.Axis(labelAngle=0)),
    color=color, 
)

top5_chart_normalized_color = alt.layer(
    greyed,
    line,
    points,
).resolve_scale(
    color='independent',
    shape='independent'
).properties(width=400).configure_legend(
titleFontSize=11,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


top5_chart_normalized_color


# Get the normalized proportion of, say, Chapter 20 in 1950: 
diachronicAnalysis(df)[20][1950]

0.6318681318681318


# Try to find out why Ch. 15 was so big in the 80s and 90s. 
chap15s = []
ids = []
for i, row in df.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1980, 1990]: # Looking at the 1980s, 1990s
        for start in starts: 
            if start > 290371 and start < 322052: # Does it cite Chapter XV? 
                if row.id not in ids: 
                    chap15s.append(row)
                    ids.append(row.id)


# Get the titles of those articles.
print('Titles of articles that quote Chapter 15:')
[item.title for item in chap15s]

Titles of articles that quote Chapter 15:

['Woman of Maxims:',
 'Brava! And Farewell to Greatheart',
 'The Union of "Miss Brooke" and "Middlemarch": A Study of the Manuscript',
 '"Middlemarch" and George Eliot\'s Female (Re) Vision of Shakespeare',
 'Heroism and Organicism in the Case of Lydgate',
 'Professional Judgment and the Rationing of Medical Care',
 'SILENCE, GESTURE, AND MEANING IN "MIDDLEMARCH"',
 'Reflections on "The Philosophical Bases of Feminist Literary Criticisms"',
 'Strategies for Writing: Theories and Practices',
 'Review Article',
 'AN END TO CONVERTING PATIENTS\' STOMACHS INTO DRUG-SHOPS: LYDGATE\'S NEW METHOD OF CHARGING HIS PATIENTS IN "MIDDLEMARCH"',
 'Review Article',
 'Illuminating the Vision of Ordinary Life: A Tribute to "Middlemarch"',
 'Review Article',
 "PLEXUSES AND GANGLIA: ELIOTS AND LEWES'S THEORY OF NERVE-CONSCIOUSNESS",
 'Review Article',
 'Middlemarch, Realism and the Birth of the Clinic',
 'ERZÄHLERISCHE OBJEKTIVITÄT, ,AUTHORIAL INTRUSIONS‘ UND ENGLISCHER REALISMUS',
 'Review Article',
 'The Aesthetics of Sympathy:',
 'NARRATIVE VOICE AND THE "FEMININE" NOVELIST: DINAH MULOCK AND GEORGE ELIOT',
 'Lamarque and Olsen on Literature and Truth',
 'Review Article',
 'Microscopy and Semiotic in Middlemarch',
 "George Eliot's Reflexive Text: Three Tonalities in the Narrative Voice of Middlemarch",
 'Review Article',
 'George Eliot and the Eighteenth-Century Novel',
 'Versions of Narrative: Overt and Covert Narrators in Nineteenth Century Historiography',
 'LYDGATE\'S RESEARCH PROJECT IN "MIDDLEMARCH"',
 'George Eliot\'s Scrupulous Research: The Facts behind Eliot\'s Use of the "Keepsake in Middlemarch"',
 'Eliot and Woolf as Historians of the Common Life',
 'The Language of Discovery: William Whewell and George Eliot',
 "George Eliot's Hypothesis of Reality",
 'Re-Reading Character',
 'The Strange Case of Monomania: Patriarchy in Literature, Murder in Middlemarch, Drowning in Daniel Deronda',
 '"Wrinkled Deep in Time": The Alexandria Quartet as Many-Layered Palimpsest',
 'THE DIALOGIC UNIVERSE OF "MIDDLEMARCH"',
 'MIXED AND ERRING HUMANITY: GEORGE ELIOT, G. H. LEWES AND GOETHE',
 '1978 And All That',
 "The Turn of George Eliot's Realism",
 'Dangerous Crossings: Dickens, Digression, and Montage',
 'In Defence of Research for Evidence-Based Teaching: A Rejoinder to Martyn Hammersley',
 'Review Article',
 'THE WONDROUS MARRIAGES OF "DANIEL DERONDA:" GENDER, WORK, AND LOVE',
 "The Victorian Discourse of Gambling: Speculations on Middlemarch and the Duke's Children",
 'Struggling for Medical Reform in Middlemarch',
 'Steamboat Surfacing: Scott and the English Novelists']


len(chap15s)

47


xvStart, xvEnd = chapterLocations[15:17]


print(mm[xvStart:xvStart+1000])

CHAPTER XV.

    "Black eyes you have left, you say,
     Blue eyes fail to draw you;
     Yet you seem more rapt to-day,
     Than of old we saw you.

    "Oh, I track the fairest fair
     Through new haunts of pleasure;
     Footprints here and echoes there
     Guide me to my treasure:

    "Lo! she turns--immortal youth
     Wrought to mortal stature,
     Fresh as starlight's aged truth--
     Many-named Nature!"


A great historian, as he insisted on calling himself, who had the
happiness to be dead a hundred and twenty years ago, and so to take his
place among the colossi whose huge legs our living pettiness is
observed to walk under, glories in his copious remarks and digressions
as the least imitable part of his work, and especially in those initial
chapters to the successive books of his history, where he seems to
bring his armchair to the proscenium and chat with us in all the lusty
ease of his fine English.  But Fielding lived when the days were longer
(for time, like mone


# Try to find out which articles cite the first 2/3 of Chapter XV (with Lydgate's scientific research) 
# vs the last 1/3 on the story of Laure
chap15p1s = []
ids = []
for i, row in df.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1980, 1990]: 
        for start in starts: 
            if start > 290371 and start < 313892: # Does it cite the first 2/3 of Chapter XV? 
                if row.id not in ids: 
                    chap15p1s.append(row)
                    ids.append(row.id)
chap15p2s = []
ids = []
for i, row in df.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1980, 1990]: 
        for start in starts: 
            if start > 313892 and start < 322052: # Does it cite the last 1/3 of Chapter XV? 
                if row.id not in ids: 
                    chap15p2s.append(row)
                    ids.append(row.id)


# Get the titles of articles citing the first 2/3 
print('Titles of articles that quote the first 2/3 of Chapter 15:')
[item.title for item in chap15p1s]

Titles of articles that quote the first 2/3 of Chapter 15:

['Woman of Maxims:',
 'Brava! And Farewell to Greatheart',
 'The Union of "Miss Brooke" and "Middlemarch": A Study of the Manuscript',
 'Heroism and Organicism in the Case of Lydgate',
 'Professional Judgment and the Rationing of Medical Care',
 'SILENCE, GESTURE, AND MEANING IN "MIDDLEMARCH"',
 'Reflections on "The Philosophical Bases of Feminist Literary Criticisms"',
 'Strategies for Writing: Theories and Practices',
 'Review Article',
 'AN END TO CONVERTING PATIENTS\' STOMACHS INTO DRUG-SHOPS: LYDGATE\'S NEW METHOD OF CHARGING HIS PATIENTS IN "MIDDLEMARCH"',
 'Review Article',
 'Illuminating the Vision of Ordinary Life: A Tribute to "Middlemarch"',
 'Review Article',
 "PLEXUSES AND GANGLIA: ELIOTS AND LEWES'S THEORY OF NERVE-CONSCIOUSNESS",
 'Review Article',
 'Middlemarch, Realism and the Birth of the Clinic',
 'ERZÄHLERISCHE OBJEKTIVITÄT, ,AUTHORIAL INTRUSIONS‘ UND ENGLISCHER REALISMUS',
 'Review Article',
 'The Aesthetics of Sympathy:',
 'NARRATIVE VOICE AND THE "FEMININE" NOVELIST: DINAH MULOCK AND GEORGE ELIOT',
 'Lamarque and Olsen on Literature and Truth',
 'Review Article',
 'Microscopy and Semiotic in Middlemarch',
 "George Eliot's Reflexive Text: Three Tonalities in the Narrative Voice of Middlemarch",
 'Review Article',
 'George Eliot and the Eighteenth-Century Novel',
 'Versions of Narrative: Overt and Covert Narrators in Nineteenth Century Historiography',
 'LYDGATE\'S RESEARCH PROJECT IN "MIDDLEMARCH"',
 'George Eliot\'s Scrupulous Research: The Facts behind Eliot\'s Use of the "Keepsake in Middlemarch"',
 'Eliot and Woolf as Historians of the Common Life',
 'The Language of Discovery: William Whewell and George Eliot',
 "George Eliot's Hypothesis of Reality",
 '"Wrinkled Deep in Time": The Alexandria Quartet as Many-Layered Palimpsest',
 'THE DIALOGIC UNIVERSE OF "MIDDLEMARCH"',
 'MIXED AND ERRING HUMANITY: GEORGE ELIOT, G. H. LEWES AND GOETHE',
 '1978 And All That',
 "The Turn of George Eliot's Realism",
 'Dangerous Crossings: Dickens, Digression, and Montage',
 'In Defence of Research for Evidence-Based Teaching: A Rejoinder to Martyn Hammersley',
 'Review Article',
 'THE WONDROUS MARRIAGES OF "DANIEL DERONDA:" GENDER, WORK, AND LOVE',
 "The Victorian Discourse of Gambling: Speculations on Middlemarch and the Duke's Children",
 'Struggling for Medical Reform in Middlemarch',
 'Steamboat Surfacing: Scott and the English Novelists']


# Get the titles of those articles.
print('Titles of articles that quote the last 1/3 of Chapter 15:')
[item.title for item in chap15p2s]

Titles of articles that quote the last 1/3 of Chapter 15:

['The Union of "Miss Brooke" and "Middlemarch": A Study of the Manuscript',
 '"Middlemarch" and George Eliot\'s Female (Re) Vision of Shakespeare',
 'Microscopy and Semiotic in Middlemarch',
 "George Eliot's Reflexive Text: Three Tonalities in the Narrative Voice of Middlemarch",
 'Re-Reading Character',
 'The Strange Case of Monomania: Patriarchy in Literature, Murder in Middlemarch, Drowning in Daniel Deronda']


# Verify that we have the right location for the start of Laure's story in the last 1/3 of Chapter XV
print(mm[313892:313892+1500])


As to women, he had once already been drawn headlong by impetuous
folly, which he meant to be final, since marriage at some distant
period would of course not be impetuous.  For those who want to be
acquainted with Lydgate it will be good to know what was that case of
impetuous folly, for it may stand as an example of the fitful swerving
of passion to which he was prone, together with the chivalrous kindness
which helped to make him morally lovable.  The story can be told
without many words.  It happened when he was studying in Paris, and
just at the time when, over and above his other work, he was occupied
with some galvanic experiments.  One evening, tired with his
experimenting, and not being able to elicit the facts he needed, he
left his frogs and rabbits to some repose under their trying and
mysterious dispensation of unexplained shocks, and went to finish his
evening at the theatre of the Porte Saint Martin, where there was a
melodrama which he had already seen several times; attracted, not by
the ingenious work of the collaborating authors, but by an actress
whose part it was to stab her lover, mistaking him for the
evil-designing duke of the piece.  Lydgate was in love with this
actress, as a man is in love with a woman whom he never expects to
speak to.  She was a Provencale, with dark eyes, a Greek profile, and
rounded majestic form, having that sort of beauty which carries a sweet
matronliness even in youth, and her voice was a soft cooing.  She had
but lately c


# Verify the location of the eipgraph and first paragraph
print(mm[290371:290371+1571])

CHAPTER XV.

    "Black eyes you have left, you say,
     Blue eyes fail to draw you;
     Yet you seem more rapt to-day,
     Than of old we saw you.

    "Oh, I track the fairest fair
     Through new haunts of pleasure;
     Footprints here and echoes there
     Guide me to my treasure:

    "Lo! she turns--immortal youth
     Wrought to mortal stature,
     Fresh as starlight's aged truth--
     Many-named Nature!"


A great historian, as he insisted on calling himself, who had the
happiness to be dead a hundred and twenty years ago, and so to take his
place among the colossi whose huge legs our living pettiness is
observed to walk under, glories in his copious remarks and digressions
as the least imitable part of his work, and especially in those initial
chapters to the successive books of his history, where he seems to
bring his armchair to the proscenium and chat with us in all the lusty
ease of his fine English.  But Fielding lived when the days were longer
(for time, like money, is measured by our needs), when summer
afternoons were spacious, and the clock ticked slowly in the winter
evenings.  We belated historians must not linger after his example; and
if we did so, it is probable that our chat would be thin and eager, as
if delivered from a campstool in a parrot-house.  I at least have so
much to do in unraveling certain human lots, and seeing how they were
woven and interwoven, that all the light I can command must be
concentrated on this particular web, and not dispersed over that
tempting range of relevancies called the universe.


chap15para1s = []
ids = []
for i, row in df.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1980, 1990]: 
        for start in starts: 
            if start > 290371 and start < 291943: # Does it cite the last 1/3 of Chapter XV? 
                if row.id not in ids: 
                    chap15para1s.append(row)
                    ids.append(row.id)


# Get the titles of articles that cite paragraph 1 of Chapter 15
print('Titles of articles that quote paragraph 1 of Chapter 15:')
[item.title for item in chap15para1s]

Titles of articles that quote paragraph 1 of Chapter 15:

['Woman of Maxims:',
 'Brava! And Farewell to Greatheart',
 'The Union of "Miss Brooke" and "Middlemarch": A Study of the Manuscript',
 'Strategies for Writing: Theories and Practices',
 'Illuminating the Vision of Ordinary Life: A Tribute to "Middlemarch"',
 'Middlemarch, Realism and the Birth of the Clinic',
 'NARRATIVE VOICE AND THE "FEMININE" NOVELIST: DINAH MULOCK AND GEORGE ELIOT',
 'Review Article',
 'Microscopy and Semiotic in Middlemarch',
 "George Eliot's Reflexive Text: Three Tonalities in the Narrative Voice of Middlemarch",
 'George Eliot and the Eighteenth-Century Novel',
 'Versions of Narrative: Overt and Covert Narrators in Nineteenth Century Historiography',
 'George Eliot\'s Scrupulous Research: The Facts behind Eliot\'s Use of the "Keepsake in Middlemarch"',
 'Eliot and Woolf as Historians of the Common Life',
 "George Eliot's Hypothesis of Reality",
 'MIXED AND ERRING HUMANITY: GEORGE ELIOT, G. H. LEWES AND GOETHE',
 'Dangerous Crossings: Dickens, Digression, and Montage',
 'Steamboat Surfacing: Scott and the English Novelists']


chap15Lydgates = []
ids = []
for i, row in df.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1980, 1990]: 
        for start in starts: 
            if start > 291942 and start < 313892: # Does it cite the first 2/3 of Chapter XV?
                if row.id not in ids: 
                    chap15Lydgates.append(row)
                    ids.append(row.id)
                    
# Get the titles of articles that cite Lydgate section
print('Titles of scholarly writings that quote the first 2/3 of Chapter 15:')
[item.title for item in chap15Lydgates]

Titles of scholarly writings that quote the first 2/3 of Chapter 15:

['The Union of "Miss Brooke" and "Middlemarch": A Study of the Manuscript',
 'Heroism and Organicism in the Case of Lydgate',
 'Professional Judgment and the Rationing of Medical Care',
 'SILENCE, GESTURE, AND MEANING IN "MIDDLEMARCH"',
 'Reflections on "The Philosophical Bases of Feminist Literary Criticisms"',
 'Review Article',
 'AN END TO CONVERTING PATIENTS\' STOMACHS INTO DRUG-SHOPS: LYDGATE\'S NEW METHOD OF CHARGING HIS PATIENTS IN "MIDDLEMARCH"',
 'Review Article',
 'Review Article',
 "PLEXUSES AND GANGLIA: ELIOTS AND LEWES'S THEORY OF NERVE-CONSCIOUSNESS",
 'Review Article',
 'Middlemarch, Realism and the Birth of the Clinic',
 'ERZÄHLERISCHE OBJEKTIVITÄT, ,AUTHORIAL INTRUSIONS‘ UND ENGLISCHER REALISMUS',
 'Review Article',
 'The Aesthetics of Sympathy:',
 'Lamarque and Olsen on Literature and Truth',
 'Microscopy and Semiotic in Middlemarch',
 'Review Article',
 'LYDGATE\'S RESEARCH PROJECT IN "MIDDLEMARCH"',
 'Eliot and Woolf as Historians of the Common Life',
 'The Language of Discovery: William Whewell and George Eliot',
 '"Wrinkled Deep in Time": The Alexandria Quartet as Many-Layered Palimpsest',
 'THE DIALOGIC UNIVERSE OF "MIDDLEMARCH"',
 '1978 And All That',
 "The Turn of George Eliot's Realism",
 'Dangerous Crossings: Dickens, Digression, and Montage',
 'In Defence of Research for Evidence-Based Teaching: A Rejoinder to Martyn Hammersley',
 'Review Article',
 'THE WONDROUS MARRIAGES OF "DANIEL DERONDA:" GENDER, WORK, AND LOVE',
 "The Victorian Discourse of Gambling: Speculations on Middlemarch and the Duke's Children",
 'Struggling for Medical Reform in Middlemarch']


# Try to find out what articles cited chapter 20 
chap20s = []
ids = []
for i, row in df.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1870, 1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010]: 
        for start in starts: 
            if start > 1236993 and start < 1278826: # Does it cite Chapter XX? 
                if row.id not in ids: 
                    chap20s.append(row)
                    ids.append(row.id)


# Get the titles of those articles.
print('Titles of scholarly writings that quote Chapter 20:')
[item.title for item in chap20s]

Titles of scholarly writings that quote Chapter 20:

['"Radiant as a Diamond": George Eliot, Jewelry and the Female Role',
 "The Hidden Abortion Plot in George Eliot's <em>Middlemarch</em>",
 'George Eliot and the Feminine Gift',
 'The Rosamond Plots',
 'Afterword',
 'Near Confinement: Pregnant Women in the Nineteenth-Century British Novel',
 'Mencken, Cushing, and <em>The Life of Sir William Osler</em>',
 'Egoism, Desires, and Friendship',
 'Of Many Minds in <em>Middlemarch</em>',
 'Teaching <em>Middlemarch</em> with a Focus on Theory of Mind',
 "The Power of Women's Hair in the Victorian Imagination",
 'The Traffic in Men: Female Kinship in Three Novels by George Eliot',
 'Transformations, Style, and the Writing Experience',
 '"Neutral Physiognomy": The Unreadable Faces of "Middlemarch"',
 'The Web of Utterance: Middlemarch',
 'Dora Spenlow, Female Communities, and Female Narrative in Charles Dickens\'s "David Copperfield" and George Eliot\'s "Middlemarch"',
 'Ibsen and Some Current Superstitions',
 'Realism as a Practical and Cosmic Joke',
 '"The One Poor Word" in "Middlemarch"',
 'The abuses of literacy',
 'Existentially Complete Abelian Lattice-Ordered Groups',
 'Narrative and History',
 'THE WRITER AND THE COMMISSARS',
 'George Eliot\'s Scrupulous Research: The Facts behind Eliot\'s Use of the "Keepsake in Middlemarch"',
 'Letter From England October 1995',
 'Middlemarch and History',
 'The Gendering of Habit in George Eliot\'s "Middlemarch"',
 'ROSAMOND VINCY OF "MIDDLEMARCH"',
 '[Discussion in Four Parts]',
 'F. R. Leavis Special Issue',
 "DISCERNING SYNTAX: GEORGE ELIOT'S RELATIVE CLAUSES",
 'When George Eliot Reads Milton: The Muse in a Different Voice',
 "Character and Destiny in George Eliot's Fiction"]


# Number of articles that cite Chapter 20
len(chap20s)

33


xxStart, xxEnd = chapterLocations[20:22] # Chapter 20 Boundaries


print(mm[xxStart:xxStart+1000]) # Verify we have Ch. 20

CHAPTER XX.

    "A child forsaken, waking suddenly,
     Whose gaze afeard on all things round doth rove,
     And seeth only that it cannot see
     The meeting eyes of love."


Two hours later, Dorothea was seated in an inner room or boudoir of a
handsome apartment in the Via Sistina.

I am sorry to add that she was sobbing bitterly, with such abandonment
to this relief of an oppressed heart as a woman habitually controlled
by pride on her own account and thoughtfulness for others will
sometimes allow herself when she feels securely alone.  And Mr.
Casaubon was certain to remain away for some time at the Vatican.

Yet Dorothea had no distinctly shapen grievance that she could state
even to herself; and in the midst of her confused thought and passion,
the mental act that was struggling forth into clearness was a
self-accusing cry that her feeling of desolation was the fault of her
own spiritual poverty.  She had married the man of her choice, and with
the advantage over most girls t


xx = mm[xxStart:xxEnd]


xxParaLocations = [match.start() for match in re.finditer('\n\n+', mm)]
xxParaLocations = [x for x in xxParaLocations if (x > xxStart) and (x < xxEnd)]


mm[xxParaLocations[4]:xxParaLocations[5]]

'\n\nBut this stupendous fragmentariness heightened the dreamlike\nstrangeness of her bridal life.  Dorothea had now been five weeks in\nRome, and in the kindly mornings when autumn and winter seemed to go\nhand in hand like a happy aged couple one of whom would presently\nsurvive in chiller loneliness, she had driven about at first with Mr.\nCasaubon, but of late chiefly with Tantripp and their experienced\ncourier.  She had been led through the best galleries, had been taken\nto the chief points of view, had been shown the grandest ruins and the\nmost glorious churches, and she had ended by oftenest choosing to drive\nout to the Campagna where she could feel alone with the earth and sky,\naway-from the oppressive masquerade of ages, in which her own life too\nseemed to become a masque with enigmatical costumes.'


articlesWithMatches['Locations in A'].loc[0]

[[130022, 130046]]


def inXX(matches): 
    """ Determine if the article has a match in Ch. 20"""
    for match in matches: 
        if match[0] > xxStart and match[0] < xxEnd:
            return True
    return False


articlesWithMatches['Locations in A'].apply(inXX).head()

0     False
9     False
17    False
19    False
21    False
Name: Locations in A, dtype: bool


# Try to find out what articles cite paragraph 6 in Chapter 20
chap20par6s = []
ids = []
for i, row in df.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1870, 1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010]: 
        for start in starts: 
            if start > 411152 and start < 412177: # Does it cite Chapter XX, paragraph 6? 
                if row.id not in ids: 
                    chap20par6s.append(row)
                    ids.append(row.id)


# Get the titles of those articles.
print('Titles of scholarly writings that quote paragraph 6 of Chapter 20:')
[item.title for item in chap20par6s]

Titles of scholarly writings that quote paragraph 6 of Chapter 20:

['“I Mistook the Faint Shadow”',
 'Torpedoes, tapirs and tortoises: scientific discourse in "Middlemarch"',
 'Sympathy Time: Adam Smith, George Eliot, and the Realist Novel',
 '“A True Prophet”? Speculation in Victorian Sensory Physiology and George Eliot’s “The Lifted Veil”',
 'One-Way Communication',
 'Review Article',
 'A Note on Middlemarch',
 "Ian McEwan's Saturday and the Aesthetics of Prose",
 'Responsibility without Consciousness',
 'Proserpine and Pessimism: Goddesses of Death, Life, and Language from Swinburne to Wharton',
 'Rights, Communities, and Tradition',
 'Reading, Writing, and Eavesdropping: Some Thoughts on the Nature of Realistic Fiction',
 'The Sound and the Fury: A Logic of Tragedy',
 'Views from above and below: George Eliot and Fakir Mohan Senapati',
 'Review Article',
 'The Not-Quite Said',
 'Review Article',
 'Incarnation and Inwardness:',
 '"Be Ye Lukewarm!": The Nineteenth-Century Novel and Social Action',
 'Development and the Learning Organisation: An Introduction',
 'Lost in Magnification: Nineteenth-Century Microscopy and <em>The Lifted Veil</em>',
 'As Sure as Shooting',
 'Review Article',
 'George Eliot and Wordsworth: The Power of Sound and the Power of Mind',
 'Came Glancing like an Arrow',
 "My Tears See More Than My Eyes MY SON'S DEPRESSION AND THE POWER OF ART",
 "Incarnation, Inwardness, and Imagination: George Eliot's Early Fiction",
 "ENGLAND AND ENGLISHNESS: FORD'S FIRST TRILOGY",
 'COMMONPLACE BOOK: Secrets',
 "T. S. Eliot's Quartets: A New Reading",
 'Eliot, Proust, Stein',
 'George Eliot and Greek Tragedy',
 "Woolf's Copernican Shift: Nonhuman Nature in Virginia Woolf's Short Fiction",
 'The Abyss of Sympathy: the Conventions of Pathos in Eighteenth and Nineteenth Century British Novels',
 'THE ECONOMIC PROBLEM OF SYMPATHY: PARABASIS, INTEREST, AND REALIST FORM IN "MIDDLEMARCH"',
 'One-Way Communication',
 'Gwendolen Harleth - Character Creation or Character Analysis?',
 "The Tramp of a Fly's Footstep: or, The Shriek, Rattle, and Roar of a Victorian Sound Track",
 'Breathless',
 'Dorothea and "Miss Brooke" in Middlemarch',
 "Louis Guilloux's Working Class Novels: Some Problems of Social Realism",
 '"Myriad-Headed, Myriad-Handed": Labor in "Middlemarch"',
 "The Squirrel's Heartbeat: Some Thoughts on the Later Style of Henry James",
 "Tolstoj's Reading of George Eliot: Visions and Revisions",
 'Shifting from Stories to Live by to Stories to Leave by: Early Career Teacher Attrition',
 'What Is Prosaics?',
 'Sound Object Lessons',
 'Exiling the Encyclopedia: The Individual in "Janet\'s Repentance"',
 'Programs and Abstracts',
 'A SHIFT IN THE ETHICS OF HARDY’S FICTION',
 'The Divine Comedy of Language: Tennyson\'s "In Memoriam"',
 'Review Article',
 'GEORGE ELIOT: THE SIBYL OF MERCIA',
 '“The Continuity of Married Companionship”',
 'Sympathy Biography and Sympathy Margin',
 'In the Scene of Being',
 'Review Article',
 'Fiction as Vivisection: G. H. Lewes and George Eliot',
 'Against Detachment',
 'Review Article',
 'Why Read George Eliot? Her novels are just modern enough—and just old-fashioned enough, too',
 'The Language of Silence: A Citation',
 'Forecasting Falls: Icarus from Freud to Auden to 9/11',
 '"THE OTHER SIDE OF SILENCE": KATHERINE ANNE PORTER\'S "HE" AS TRAGEDY',
 'The Made Man and the “Minor” Novel: <em>Erewhon</em>, ANT, and Empire',
 'Charles Darwin and the Victorian Pre-History of Climate Denial']


len(chap20par6s) # The number of items citing paragraph 6 in chapter 20

66


mm[411152:412177]

'\n\nNot that this inward amazement of Dorothea\'s was anything very\nexceptional: many souls in their young nudity are tumbled out among\nincongruities and left to "find their feet" among them, while their\nelders go about their business.  Nor can I suppose that when Mrs.\nCasaubon is discovered in a fit of weeping six weeks after her wedding,\nthe situation will be regarded as tragic.  Some discouragement, some\nfaintness of heart at the new real future which replaces the imaginary,\nis not unusual, and we do not expect people to be deeply moved by what\nis not unusual.  That element of tragedy which lies in the very fact of\nfrequency, has not yet wrought itself into the coarse emotion of\nmankind; and perhaps our frames could hardly bear much of it.  If we\nhad a keen vision and feeling of all ordinary human life, it would be\nlike hearing the grass grow and the squirrel\'s heart beat, and we\nshould die of that roar which lies on the other side of silence.  As it\nis, the quickest of us walk about well wadded with stupidity'


def paraIndicesIn20(matches, paraLocations=xxParaLocations): 
    """ Determine paragraph number (index) for match in Ch. 20. """
    paraIndices = []
    if inXX(matches): 
        paraBoundaries = list(zip(paraLocations, paraLocations[1:]))
        for match in matches: 
            for i, paraBoundary in enumerate(paraBoundaries): 
                if set(range(match[0], match[1])) & set(range(paraBoundary[0], paraBoundary[1])): # find the set intersection of the ranges of pairs
                    paraIndices.append(i)
                else: 
                    paraIndices.append(None)
    return paraIndices


len(set(range(8, 10)) & set(range(1, 9)))

1


articlesWithMatches['paraIndicesIn20'] = articlesWithMatches['Locations in A'].apply(paraIndicesIn20)

/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_49489/4864444.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articlesWithMatches['paraIndicesIn20'] = articlesWithMatches['Locations in A'].apply(paraIndicesIn20)


counters = list(articlesWithMatches['paraIndicesIn20'].apply(Counter))


grandTally = Counter()


for counter in counters: 
    grandTally += counter


del grandTally[None]


dict(grandTally)

{6: 69,
 5: 47,
 3: 7,
 15: 4,
 10: 20,
 29: 2,
 25: 3,
 4: 6,
 7: 9,
 12: 3,
 14: 3,
 33: 6,
 18: 3,
 26: 8,
 17: 7,
 16: 7,
 11: 8,
 22: 1,
 1: 1,
 2: 1,
 13: 1,
 8: 1,
 9: 2}


pd.Series(dict(grandTally)).sort_index().plot(kind='bar', title="Which paragraphs in Chapter 20 are quoted most often?")

<Axes: title={'center': 'Which paragraphs in Chapter 20 are quoted most often?'}>


print(mm[xxParaLocations[5]:xxParaLocations[7]]) # What are paragraphs #5 and #6?


To those who have looked at Rome with the quickening power of a
knowledge which breathes a growing soul into all historic shapes, and
traces out the suppressed transitions which unite all contrasts, Rome
may still be the spiritual centre and interpreter of the world.  But
let them conceive one more historical contrast: the gigantic broken
revelations of that Imperial and Papal city thrust abruptly on the
notions of a girl who had been brought up in English and Swiss
Puritanism, fed on meagre Protestant histories and on art chiefly of
the hand-screen sort; a girl whose ardent nature turned all her small
allowance of knowledge into principles, fusing her actions into their
mould, and whose quick emotions gave the most abstract things the
quality of a pleasure or a pain; a girl who had lately become a wife,
and from the enthusiastic acceptance of untried duty found herself
plunged in tumultuous preoccupation with her personal lot.  The weight
of unintelligible Rome might lie easily on bright nymphs to whom it
formed a background for the brilliant picnic of Anglo-foreign society;
but Dorothea had no such defence against deep impressions.  Ruins and
basilicas, palaces and colossi, set in the midst of a sordid present,
where all that was living and warm-blooded seemed sunk in the deep
degeneracy of a superstition divorced from reverence; the dimmer but
yet eager Titanic life gazing and struggling on walls and ceilings; the
long vistas of white forms whose marble eyes seemed to hold the
monotonous light of an alien world: all this vast wreck of ambitious
ideals, sensuous and spiritual, mixed confusedly with the signs of
breathing forgetfulness and degradation, at first jarred her as with an
electric shock, and then urged themselves on her with that ache
belonging to a glut of confused ideas which check the flow of emotion.
Forms both pale and glowing took possession of her young sense, and
fixed themselves in her memory even when she was not thinking of them,
preparing strange associations which remained through her after-years.
Our moods are apt to bring with them images which succeed each other
like the magic-lantern pictures of a doze; and in certain states of
dull forlornness Dorothea all her life continued to see the vastness of
St. Peter's, the huge bronze canopy, the excited intention in the
attitudes and garments of the prophets and evangelists in the mosaics
above, and the red drapery which was being hung for Christmas spreading
itself everywhere like a disease of the retina.

Not that this inward amazement of Dorothea's was anything very
exceptional: many souls in their young nudity are tumbled out among
incongruities and left to "find their feet" among them, while their
elders go about their business.  Nor can I suppose that when Mrs.
Casaubon is discovered in a fit of weeping six weeks after her wedding,
the situation will be regarded as tragic.  Some discouragement, some
faintness of heart at the new real future which replaces the imaginary,
is not unusual, and we do not expect people to be deeply moved by what
is not unusual.  That element of tragedy which lies in the very fact of
frequency, has not yet wrought itself into the coarse emotion of
mankind; and perhaps our frames could hardly bear much of it.  If we
had a keen vision and feeling of all ordinary human life, it would be
like hearing the grass grow and the squirrel's heart beat, and we
should die of that roar which lies on the other side of silence.  As it
is, the quickest of us walk about well wadded with stupidity.


# Top 10 journals with most articles in our dataset
df['journal'] = df['isPartOf']
journalStats = df['journal'].value_counts()
print("Journal   |    Number of articles in our dataset")
journalStats[:10]

Journal   |    Number of articles in our dataset

Victorian Studies                            459
George Eliot - George Henry Lewes Studies    231
The Modern Language Review                   192
Nineteenth-Century Fiction                   192
The Review of English Studies                190
PMLA                                         154
NOVEL: A Forum on Fiction                    148
Nineteenth-Century Literature                139
Studies in the Novel                         124
ELH                                          102
Name: journal, dtype: int64


journalList = journalStats.index


journals = pd.DataFrame({title: synchronicAnalysis(df.loc[df['journal'] == title]) for title in journalList }).T


cutoff = 1500
topJournals = journals.loc[journals.sum(axis=1) > cutoff]
otherJournals = journals.loc[journals.sum(axis=1) < cutoff]
topJournals.loc['Other'] = otherJournals.sum()

/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_49489/1477629281.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  topJournals.loc['Other'] = otherJournals.sum()


topJournals.T.plot(kind='bar', title="Middlemarch quotations per chapter, by journal", stacked=True, colormap='nipy_spectral')

<Axes: title={'center': 'Middlemarch quotations per chapter, by journal'}>


#ax = topJournals.T.plot(kind='bar', stacked=True, colormap='nipy_spectral')
#fig = ax.get_figure()
#fig.savefig('synchronic-journals.png', bboxinches='tight', dpi=300)


# Print the total number of journals
len(journalStats)

789


list_of_VS_journals = ['Victorian Studies', 'George Eliot - George Henry Lewes Studies', 'Nineteenth-Century Fiction', 'Nineteenth-Century Literature', 'Dickens Studies Annual', 'Victorian Literature and Culture', 'Victorian Review', 'The George Eliot, George Henry Lewes Newsletter', 'Victorian Periodicals Review', 'Dickens Quarterly', 'Victorian Poetry', 'The Thomas Hardy Journal', 'The Gaskell Society Journal', 'The Gaskell Journal', 'Newsletter of the Victorian Studies Association of Western Canada', 'Dickens Studies Newsletter', 'Browning Institute Studies', 'Victorian Periodicals Newsletter', 'Carlyle Studies Annual', 'Conradiana', 'Tennyson Research Bulletin', 'The Conradian', 'The Hardy Society Journal', 'The Hardy Review', 'Studies in Browning and His Circle', 'Nineteenth-Century French Studies', 'The Wilkie Collins Journal', 'Carlyle Newsletter', 'The Wildean', 'Dickens Studies', 'Carlyle Annual', '19th-Century Music', 'The Trollopian', 'Conrad Studies']


print("LIST OF VICTORIANIST JOURNALS \n")
for item in list_of_VS_journals:
    print(item)

LIST OF VICTORIANIST JOURNALS 

Victorian Studies
George Eliot - George Henry Lewes Studies
Nineteenth-Century Fiction
Nineteenth-Century Literature
Dickens Studies Annual
Victorian Literature and Culture
Victorian Review
The George Eliot, George Henry Lewes Newsletter
Victorian Periodicals Review
Dickens Quarterly
Victorian Poetry
The Thomas Hardy Journal
The Gaskell Society Journal
The Gaskell Journal
Newsletter of the Victorian Studies Association of Western Canada
Dickens Studies Newsletter
Browning Institute Studies
Victorian Periodicals Newsletter
Carlyle Studies Annual
Conradiana
Tennyson Research Bulletin
The Conradian
The Hardy Society Journal
The Hardy Review
Studies in Browning and His Circle
Nineteenth-Century French Studies
The Wilkie Collins Journal
Carlyle Newsletter
The Wildean
Dickens Studies
Carlyle Annual
19th-Century Music
The Trollopian
Conrad Studies


len(list_of_VS_journals)

34


just_VS_journals_quotes = articlesWithMatches[articlesWithMatches['isPartOf'].isin(list_of_VS_journals)]


# Number of Victorianist articles containing matches
len(just_VS_journals_quotes)

429


just_VS_journals_quotes['isPartOf'].value_counts()

George Eliot - George Henry Lewes Studies                            106
Victorian Studies                                                     78
Nineteenth-Century Fiction                                            68
Nineteenth-Century Literature                                         37
Victorian Literature and Culture                                      37
Dickens Studies Annual                                                19
Victorian Review                                                      13
Victorian Poetry                                                      12
The George Eliot, George Henry Lewes Newsletter                       11
Victorian Periodicals Review                                           8
Dickens Quarterly                                                      5
The Thomas Hardy Journal                                               5
The Gaskell Society Journal                                            4
Browning Institute Studies                                             4
Tennyson Research Bulletin                                             4
Carlyle Studies Annual                                                 3
The Gaskell Journal                                                    2
Conradiana                                                             2
Dickens Studies Newsletter                                             2
19th-Century Music                                                     1
Newsletter of the Victorian Studies Association of Western Canada      1
Conrad Studies                                                         1
Nineteenth-Century French Studies                                      1
The Wilkie Collins Journal                                             1
Carlyle Annual                                                         1
The Hardy Review                                                       1
Victorian Periodicals Newsletter                                       1
The Wildean                                                            1
Name: isPartOf, dtype: int64


articlesWithMatches['isPartOf'].value_counts()

George Eliot - George Henry Lewes Studies    106
Victorian Studies                             78
Nineteenth-Century Fiction                    68
PMLA                                          47
ELH                                           42
                                            ... 
Science                                        1
Transformation of Rage                         1
Anglican and Episcopal History                 1
The Journal of Ethics                          1
Sociological Forum                             1
Name: isPartOf, Length: 403, dtype: int64


# Number of matches from Victorianist journals
just_VS_journals_quotes['numMatches'].sum()

1258


# Number of matches from the journal *Victorian Studies*
just_VS_journals_quotes[just_VS_journals_quotes['isPartOf']== "Victorian Studies"].numMatches.sum()

150


# Number of matches, overall
articlesWithMatches['numMatches'].sum()

3800


# What proportion of matches come from Victorianist journals?
just_VS_journals_quotes['numMatches'].sum() / articlesWithMatches['numMatches'].sum()

0.3310526315789474


geJournals = df.loc[df['journal'] == 'George Eliot - George Henry Lewes Studies']


pd.set_option('display.max_columns', 207)


geJournals


print(geJournals.title)

37                                         Review Article
76      The "British Matron" and the Poetic Drama: The...
101                                          Front Matter
107          GEORGE ELIOT'S READING: A CHRONOLOGICAL LIST
108     Hidden Allusion in the Finale of <em>Middlemar...
                              ...                        
5798                                       Review Article
5835                JEWISH MYTH IN GEORGE ELIOT'S FICTION
5853    GEORGE ELIOT AND GEORGE HENRY LEWES: SELECTED ...
5865                                          Back Matter
5876                                  A GEORGE ELIOT NOTE
Name: title, Length: 231, dtype: object


print("Number of articles where journal title is 'George ELiot - George Henry Lewes Studies':")
len(geJournals)

Number of articles where journal title is 'George ELiot - George Henry Lewes Studies':

231


plotSynchronicAnalysis(synchronicAnalysis(geJournals, useWordcounts=False), useWordcounts=False)


# Weighted by wordcount (by the number of words in the quotation) and normalized by decade(counts are scaled to the maximum value per decade)
GEGHLSbooksDiaDF = diachronicAnalysis(geJournals, decades=(1990, 2020), bins=bookLocations, useWordcounts=True, normalize=True).sort_index()
print('GE-GHLS Quotations per book, per decade (weighted by length of quotation and normalized by decade):')
GEGHLSbooksDiaDF

GE-GHLS Quotations per book, per decade (weighted by length of quotation and normalized by decade):


GEGHLSbooksDiaDF['decade'] = GEGHLSbooksDiaDF.index


GEGHLSbooksMelted = GEGHLSbooksDiaDF.melt(id_vars='decade', var_name='book')


# cut out erroneous "book 0" material (ie title page)
GEGHLSbooksMelted = GEGHLSbooksMelted[GEGHLSbooksMelted.book != 0]


# Raw quotation counts (not weighted by the number of words in the quoatation or normalized by decade)
# Turning on useWordcounts makes it so that it's weighted by wordcount. Turning it off uses raw numbers of quotations.
GEGHLSbooksNotNormalizedNotWeightedDiaDF = diachronicAnalysis(geJournals, decades=(1960, 2020), bins=bookLocations,\
                                                        useWordcounts=False, normalize=False).sort_index()
print('Number of quotations per book, per decade in GE-GHLS')
GEGHLSbooksNotNormalizedNotWeightedDiaDF

Number of quotations per book, per decade in GE-GHLS


GEGHLSbooksNotNormalizedNotWeightedDiaDF['decade'] = GEGHLSbooksNotNormalizedNotWeightedDiaDF.index


GEGHLSbooksNotNormalizedNotWeightedDiaDFMelted = GEGHLSbooksNotNormalizedNotWeightedDiaDF.melt(id_vars='decade', var_name='book')


# cut out erroneous "book 0" material (ie title page)
GEGHLSbooksNotNormalizedNotWeightedDiaDFMelted = GEGHLSbooksNotNormalizedNotWeightedDiaDFMelted[GEGHLSbooksNotNormalizedNotWeightedDiaDFMelted.book != 0]


# Raw quotation counts (not weighted by the number of words in the quoatation or normalized by decade)
# Turning on useWordcounts makes it so that it's weighted by wordcount. Turning it off uses raw numbers of quotations.
GEGHLSdiaDFquoteOnly = diachronicAnalysis(geJournals, decades=(1960, 2020), bins=chapterLocations, useWordcounts=False, normalize=False).sort_index()
GEGHLSdiaDFquoteOnly.columns.name ='chapter'
GEGHLSdiaDFquoteOnly.index.name = 'decade'


GEGHLSdiaDFquoteOnly


GEGHLSdiaDFquoteOnly['decade'] = GEGHLSdiaDFquoteOnly.index


GEGHLSdiaDFquoteOnlyMelted = GEGHLSdiaDFquoteOnly.melt(id_vars='decade')


alt.Chart(GEGHLSdiaDFquoteOnlyMelted, title="GE-GHLS Middlemarch quotations per chapter, per decade (not weighted or normalized)")\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Number of Quotations")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Weighted by wordcount (by the number of words in the quoatation) and normalized by decade(counts are scaled to the maximum value per decade)
GEGHLSnormalizeddiaDF = diachronicAnalysis(geJournals, decades=(1960, 2020), bins=chapterLocations, useWordcounts=True, normalize=True).sort_index()
GEGHLSnormalizeddiaDF.columns.name = 'chapter'
GEGHLSnormalizeddiaDF.index.name = 'decade'


GEGHLSnormalizeddiaDF


GEGHLSnormalizeddiaDF['decade'] = GEGHLSnormalizeddiaDF.index


GEGHLSnormalizeddiaMelted = GEGHLSnormalizeddiaDF.melt(id_vars='decade')


alt.Chart(GEGHLSnormalizeddiaMelted, title="GE-GHLS Middlemarch quotations per chapter, per decade (normalized by decade and weighted by word count)")\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Amount Quoted")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


geJournals = df.loc[df['journal'] == 'George Eliot - George Henry Lewes Studies']
otherJournals = df.loc[df['journal'] != 'George Eliot - George Henry Lewes Studies']


# Normalize
geDF = synchronicAnalysis(geJournals)
otherDF = synchronicAnalysis(otherJournals)
normGE = geDF.div(geDF.max())
normOther = otherDF.div(otherDF.max())


fig = plt.figure()
ax = (normGE - normOther).plot(kind='bar')
fig.add_subplot(ax)
ax.set_xlabel('Chapter')
ax.set_ylabel('Specialization Index')
# Save a big version for publication. 
#fig.savefig('specialization.png', bboxinches='tight', dpi=300)

Text(0, 0.5, 'Specialization Index')


vsJournals = df.loc[df['journal'] == 'Victorian Studies']


pd.set_option('display.max_rows', 300)


vsJournals


print("Number of  articles where journal title is 'Victorian Studies':")
len(vsJournals)

Number of  articles where journal title is 'Victorian Studies':

459


plotSynchronicAnalysis(synchronicAnalysis(vsJournals, useWordcounts=False), useWordcounts=False)


# Weighted by wordcount (by the number of words in the quotation) and normalized by decade(counts are scaled to the maximum value per decade)
VSbooksDiaDF = diachronicAnalysis(vsJournals, decades=(1960, 2020), bins=bookLocations, useWordcounts=True, normalize=True).sort_index()
print('Victorian Studies Quotations per book, per decade (weighted by length of quotation and normalized by decade):')
VSbooksDiaDF

Victorian Studies Quotations per book, per decade (weighted by length of quotation and normalized by decade):


VSbooksDiaDF['decade'] = VSbooksDiaDF.index


VSbooksMelted = VSbooksDiaDF.melt(id_vars='decade', var_name='book')


# cut out erroneous "book 0" material (ie title page)
VSbooksMelted = VSbooksMelted[VSbooksMelted.book != 0]


alt.Chart(VSbooksMelted, title="*Victorian Studies* Middlemarch quotations per book, per decade (normalized by decade and weighted by word count)")\
.mark_rect().encode(x=alt.X('book', title="Book", type='ordinal', axis=alt.Axis(labelAngle=0)), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Amount Quoted")))\
.properties(width=500, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Raw quotation counts (not weighted by the number of words in the quoatation or normalized by decade)
# Turning on useWordcounts makes it so that it's weighted by wordcount. Turning it off uses raw numbers of quotations.
VSbooksNotNormalizedNotWeightedDiaDF = diachronicAnalysis(vsJournals, decades=(1960, 2020), bins=bookLocations,\
                                                        useWordcounts=False, normalize=False).sort_index()
print('Number of quotations per book, per decade in VS')
VSbooksNotNormalizedNotWeightedDiaDF

Number of quotations per book, per decade in VS


VSbooksNotNormalizedNotWeightedDiaDF['decade'] = VSbooksNotNormalizedNotWeightedDiaDF.index


VSbooksNotNormalizedNotWeightedDiaDFMelted = VSbooksNotNormalizedNotWeightedDiaDF.melt(id_vars='decade', var_name='book')


# cut out erroneous "book 0" material (ie title page)
VSbooksNotNormalizedNotWeightedDiaDFMelted = VSbooksNotNormalizedNotWeightedDiaDFMelted[VSbooksNotNormalizedNotWeightedDiaDFMelted.book != 0]


alt.Chart(VSbooksNotNormalizedNotWeightedDiaDFMelted, title="Victorian Studies Middlemarch quotations per book, per decade (not weighted or normalized by decade)")\
.mark_rect().encode(x=alt.X('book', title="Book", type='ordinal', axis=alt.Axis(labelAngle=0)), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Number of Quotations")))\
.properties(width=500, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Raw quotation counts (not weighted by the number of words in the quoatation or normalized by decade)
# Turning on useWordcounts makes it so that it's weighted by wordcount. Turning it off uses raw numbers of quotations.
VSdiaDFquoteOnly = diachronicAnalysis(vsJournals, decades=(1960, 2030), bins=chapterLocations, useWordcounts=False, normalize=False).sort_index()
VSdiaDFquoteOnly.columns.name ='chapter'
VSdiaDFquoteOnly.index.name = 'decade'


VSdiaDFquoteOnly


VSdiaDFquoteOnly['decade'] = VSdiaDFquoteOnly.index


VSdiaDFquoteOnlyMelted = VSdiaDFquoteOnly.melt(id_vars='decade')


alt.Chart(VSdiaDFquoteOnlyMelted, title="*Victorian Studies* Middlemarch quotations per chapter, per decade (not weighted or normalized)")\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Number of Quotations")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Weighted by wordcount (by the number of words in the quoatation) and normalized by decade(counts are scaled to the maximum value per decade)
VSnormalizeddiaDF = diachronicAnalysis(vsJournals, decades=(1960, 2020), bins=chapterLocations, useWordcounts=True, normalize=True).sort_index()
VSnormalizeddiaDF.columns.name = 'chapter'
VSnormalizeddiaDF.index.name = 'decade'


VSnormalizeddiaDF


VSnormalizeddiaDF['decade'] = VSnormalizeddiaDF.index


VSnormalizeddiaMelted = VSnormalizeddiaDF.melt(id_vars='decade')


alt.Chart(VSnormalizeddiaMelted, title="*Victorian Studies* Middlemarch quotations per chapter, per decade (normalized by decade and weighted by word count)")\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Amount Quoted")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Try to find out why Ch. 15 was so big in the 80s and 90s. 
chap15sVS = []
idsVS = []

for i, row in vsJournals.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1960, 1970, 1980, 1990, 2000, 2010, 2020]: # Looking at the 1980s, 1990s
        for start in starts: 
            if start > 290371 and start < 322052: # Does it cite Chapter XV? 
                if row.id not in ids: 
                    chap15sVS.append(row)
                    idsVS.append(row.id)


print('Titles of articles that quote Chapter 15:')
[item.title for item in chap15sVS]

Titles of articles that quote Chapter 15:

['Review Article',
 'Review Article',
 "George Eliot's Vagueness",
 'Review Article',
 'Review Article']


print('Ids of articles that quote Chapter 15:')
chap15sVS_ids = [item.id for item in chap15sVS]
chap15sVS_ids

Ids of articles that quote Chapter 15:

['http://www.jstor.org/stable/3828662',
 'http://www.jstor.org/stable/3828663',
 'http://www.jstor.org/stable/10.2979/victorianstudies.56.4.625',
 'http://www.jstor.org/stable/3825213',
 'http://www.jstor.org/stable/4618985']


mm[1680441:1694311]
#[977680:998037]

'CHAPTER LXXX.\n\n       "Stern lawgiver! yet thou dost wear\n        The Godhead\'s most benignant grace;\n        Nor know we anything so fair\n        As is the smile upon thy face;\n        Flowers laugh before thee on their beds,\n        And fragrance in thy footing treads;\n        Thou dost preserve the Stars from wrong;\n    And the most ancient Heavens, through thee, are fresh and strong.\n                                        --WORDSWORTH: Ode to Duty.\n\n\nWhen Dorothea had seen Mr. Farebrother in the morning, she had promised\nto go and dine at the parsonage on her return from Freshitt.  There was\na frequent interchange of visits between her and the Farebrother\nfamily, which enabled her to say that she was not at all lonely at the\nManor, and to resist for the present the severe prescription of a lady\ncompanion.  When she reached home and remembered her engagement, she\nwas glad of it; and finding that she had still an hour before she could\ndress for dinner, she walked straight to the schoolhouse and entered\ninto a conversation with the master and mistress about the new bell,\ngiving eager attention to their small details and repetitions, and\ngetting up a dramatic sense that her life was very busy.  She paused on\nher way back to talk to old Master Bunney who was putting in some\ngarden-seeds, and discoursed wisely with that rural sage about the\ncrops that would make the most return on a perch of ground, and the\nresult of sixty years\' experience as to soils--namely, that if your\nsoil was pretty mellow it would do, but if there came wet, wet, wet to\nmake it all of a mummy, why then--\n\nFinding that the social spirit had beguiled her into being rather late,\nshe dressed hastily and went over to the parsonage rather earlier than\nwas necessary.  That house was never dull, Mr. Farebrother, like\nanother White of Selborne, having continually something new to tell of\nhis inarticulate guests and proteges, whom he was teaching the boys not\nto torment; and he had just set up a pair of beautiful goats to be pets\nof the village in general, and to walk at large as sacred animals.  The\nevening went by cheerfully till after tea, Dorothea talking more than\nusual and dilating with Mr. Farebrother on the possible histories of\ncreatures that converse compendiously with their antennae, and for\naught we know may hold reformed parliaments; when suddenly some\ninarticulate little sounds were heard which called everybody\'s\nattention.\n\n"Henrietta Noble," said Mrs. Farebrother, seeing her small sister\nmoving about the furniture-legs distressfully, "what is the matter?"\n\n"I have lost my tortoise-shell lozenge-box. I fear the kitten has\nrolled it away," said the tiny old lady, involuntarily continuing her\nbeaver-like notes.\n\n"Is it a great treasure, aunt?" said Mr. Farebrother, putting up his\nglasses and looking at the carpet.\n\n"Mr. Ladislaw gave it me," said Miss Noble.  "A German box--very\npretty, but if it falls it always spins away as far as it can."\n\n"Oh, if it is Ladislaw\'s present," said Mr. Farebrother, in a deep tone\nof comprehension, getting up and hunting.  The box was found at last\nunder a chiffonier, and Miss Noble grasped it with delight, saying, "it\nwas under a fender the last time."\n\n"That is an affair of the heart with my aunt," said Mr. Farebrother,\nsmiling at Dorothea, as he reseated himself.\n\n"If Henrietta Noble forms an attachment to any one, Mrs. Casaubon,"\nsaid his mother, emphatically,--"she is like a dog--she would take\ntheir shoes for a pillow and sleep the better."\n\n"Mr. Ladislaw\'s shoes, I would," said Henrietta Noble.\n\nDorothea made an attempt at smiling in return.  She was surprised and\nannoyed to find that her heart was palpitating violently, and that it\nwas quite useless to try after a recovery of her former animation.\nAlarmed at herself--fearing some further betrayal of a change so marked\nin its occasion, she rose and said in a low voice with undisguised\nanxiety, "I must go; I have overtired myself."\n\nMr. Farebrother, quick in perception, rose and said, "It is true; you\nmust have half-exhausted yourself in talking about Lydgate.  That sort\nof work tells upon one after the excitement is over."\n\nHe gave her his arm back to the Manor, but Dorothea did not attempt to\nspeak, even when he said good-night.\n\nThe limit of resistance was reached, and she had sunk back helpless\nwithin the clutch of inescapable anguish.  Dismissing Tantripp with a\nfew faint words, she locked her door, and turning away from it towards\nthe vacant room she pressed her hands hard on the top of her head, and\nmoaned out--\n\n"Oh, I did love him!"\n\nThen came the hour in which the waves of suffering shook her too\nthoroughly to leave any power of thought.  She could only cry in loud\nwhispers, between her sobs, after her lost belief which she had planted\nand kept alive from a very little seed since the days in Rome--after\nher lost joy of clinging with silent love and faith to one who,\nmisprized by others, was worthy in her thought--after her lost woman\'s\npride of reigning in his memory--after her sweet dim perspective of\nhope, that along some pathway they should meet with unchanged\nrecognition and take up the backward years as a yesterday.\n\nIn that hour she repeated what the merciful eyes of solitude have\nlooked on for ages in the spiritual struggles of man--she besought\nhardness and coldness and aching weariness to bring her relief from the\nmysterious incorporeal might of her anguish: she lay on the bare floor\nand let the night grow cold around her; while her grand woman\'s frame\nwas shaken by sobs as if she had been a despairing child.\n\nThere were two images--two living forms that tore her heart in two, as\nif it had been the heart of a mother who seems to see her child divided\nby the sword, and presses one bleeding half to her breast while her\ngaze goes forth in agony towards the half which is carried away by the\nlying woman that has never known the mother\'s pang.\n\nHere, with the nearness of an answering smile, here within the\nvibrating bond of mutual speech, was the bright creature whom she had\ntrusted--who had come to her like the spirit of morning visiting the\ndim vault where she sat as the bride of a worn-out life; and now, with\na full consciousness which had never awakened before, she stretched out\nher arms towards him and cried with bitter cries that their nearness\nwas a parting vision: she discovered her passion to herself in the\nunshrinking utterance of despair.\n\nAnd there, aloof, yet persistently with her, moving wherever she moved,\nwas the Will Ladislaw who was a changed belief exhausted of hope, a\ndetected illusion--no, a living man towards whom there could not yet\nstruggle any wail of regretful pity, from the midst of scorn and\nindignation and jealous offended pride.  The fire of Dorothea\'s anger\nwas not easily spent, and it flamed out in fitful returns of spurning\nreproach.  Why had he come obtruding his life into hers, hers that\nmight have been whole enough without him?  Why had he brought his cheap\nregard and his lip-born words to her who had nothing paltry to give in\nexchange?  He knew that he was deluding her--wished, in the very moment\nof farewell, to make her believe that he gave her the whole price of\nher heart, and knew that he had spent it half before.  Why had he not\nstayed among the crowd of whom she asked nothing--but only prayed that\nthey might be less contemptible?\n\nBut she lost energy at last even for her loud-whispered cries and\nmoans: she subsided into helpless sobs, and on the cold floor she\nsobbed herself to sleep.\n\nIn the chill hours of the morning twilight, when all was dim around\nher, she awoke--not with any amazed wondering where she was or what had\nhappened, but with the clearest consciousness that she was looking into\nthe eyes of sorrow.  She rose, and wrapped warm things around her, and\nseated herself in a great chair where she had often watched before.\nShe was vigorous enough to have borne that hard night without feeling\nill in body, beyond some aching and fatigue; but she had waked to a new\ncondition: she felt as if her soul had been liberated from its terrible\nconflict; she was no longer wrestling with her grief, but could sit\ndown with it as a lasting companion and make it a sharer in her\nthoughts.  For now the thoughts came thickly.  It was not in Dorothea\'s\nnature, for longer than the duration of a paroxysm, to sit in the\nnarrow cell of her calamity, in the besotted misery of a consciousness\nthat only sees another\'s lot as an accident of its own.\n\nShe began now to live through that yesterday morning deliberately\nagain, forcing herself to dwell on every detail and its possible\nmeaning.  Was she alone in that scene?  Was it her event only?  She\nforced herself to think of it as bound up with another woman\'s life--a\nwoman towards whom she had set out with a longing to carry some\nclearness and comfort into her beclouded youth.  In her first outleap\nof jealous indignation and disgust, when quitting the hateful room, she\nhad flung away all the mercy with which she had undertaken that visit.\nShe had enveloped both Will and Rosamond in her burning scorn, and it\nseemed to her as if Rosamond were burned out of her sight forever.  But\nthat base prompting which makes a women more cruel to a rival than to a\nfaithless lover, could have no strength of recurrence in Dorothea when\nthe dominant spirit of justice within her had once overcome the tumult\nand had once shown her the truer measure of things.  All the active\nthought with which she had before been representing to herself the\ntrials of Lydgate\'s lot, and this young marriage union which, like her\nown, seemed to have its hidden as well as evident troubles--all this\nvivid sympathetic experience returned to her now as a power: it\nasserted itself as acquired knowledge asserts itself and will not let\nus see as we saw in the day of our ignorance.  She said to her own\nirremediable grief, that it should make her more helpful, instead of\ndriving her back from effort.\n\nAnd what sort of crisis might not this be in three lives whose contact\nwith hers laid an obligation on her as if they had been suppliants\nbearing the sacred branch?  The objects of her rescue were not to be\nsought out by her fancy: they were chosen for her.  She yearned towards\nthe perfect Right, that it might make a throne within her, and rule her\nerrant will.  "What should I do--how should I act now, this very day,\nif I could clutch my own pain, and compel it to silence, and think of\nthose three?"\n\nIt had taken long for her to come to that question, and there was light\npiercing into the room.  She opened her curtains, and looked out\ntowards the bit of road that lay in view, with fields beyond outside\nthe entrance-gates. On the road there was a man with a bundle on his\nback and a woman carrying her baby; in the field she could see figures\nmoving--perhaps the shepherd with his dog.  Far off in the bending sky\nwas the pearly light; and she felt the largeness of the world and the\nmanifold wakings of men to labor and endurance.  She was a part of that\ninvoluntary, palpitating life, and could neither look out on it from\nher luxurious shelter as a mere spectator, nor hide her eyes in selfish\ncomplaining.\n\nWhat she would resolve to do that day did not yet seem quite clear, but\nsomething that she could achieve stirred her as with an approaching\nmurmur which would soon gather distinctness.  She took off the clothes\nwhich seemed to have some of the weariness of a hard watching in them,\nand began to make her toilet.  Presently she rang for Tantripp, who\ncame in her dressing-gown.\n\n"Why, madam, you\'ve never been in bed this blessed night," burst out\nTantripp, looking first at the bed and then at Dorothea\'s face, which\nin spite of bathing had the pale cheeks and pink eyelids of a mater\ndolorosa. "You\'ll kill yourself, you _will_.  Anybody might think now\nyou had a right to give yourself a little comfort."\n\n"Don\'t be alarmed, Tantripp," said Dorothea, smiling.  "I have slept; I\nam not ill.  I shall be glad of a cup of coffee as soon as possible.\nAnd I want you to bring me my new dress; and most likely I shall want\nmy new bonnet to-day."\n\n"They\'ve lain there a month and more ready for you, madam, and most\nthankful I shall be to see you with a couple o\' pounds\' worth less of\ncrape," said Tantripp, stooping to light the fire.  "There\'s a reason\nin mourning, as I\'ve always said; and three folds at the bottom of your\nskirt and a plain quilling in your bonnet--and if ever anybody looked\nlike an angel, it\'s you in a net quilling--is what\'s consistent for a\nsecond year.  At least, that\'s _my_ thinking," ended Tantripp, looking\nanxiously at the fire; "and if anybody was to marry me flattering\nhimself I should wear those hijeous weepers two years for him, he\'d be\ndeceived by his own vanity, that\'s all."\n\n"The fire will do, my good Tan," said Dorothea, speaking as she used to\ndo in the old Lausanne days, only with a very low voice; "get me the\ncoffee."\n\nShe folded herself in the large chair, and leaned her head against it\nin fatigued quiescence, while Tantripp went away wondering at this\nstrange contrariness in her young mistress--that just the morning when\nshe had more of a widow\'s face than ever, she should have asked for her\nlighter mourning which she had waived before.  Tantripp would never\nhave found the clew to this mystery.  Dorothea wished to acknowledge\nthat she had not the less an active life before her because she had\nburied a private joy; and the tradition that fresh garments belonged to\nall initiation, haunting her mind, made her grasp after even that\nslight outward help towards calm resolve.  For the resolve was not easy.\n\nNevertheless at eleven o\'clock she was walking towards Middlemarch,\nhaving made up her mind that she would make as quietly and unnoticeably\nas possible her second attempt to see and save Rosamond.\n\n\n\n'


# Try to find out source of Ch. 46 quotes in VS
chap46sVS = []
idsVS = []

for i, row in vsJournals.iterrows(): 
    locations = row['Locations in A']
    starts = [item[0] for item in locations]
    if row['Decade'] in [1960, 1970, 1980, 1990, 2000, 2010, 2020]: # Looking at the 1980s, 1990s
        for start in starts: 
            if start > 977680 and start < 998037: # Does it cite Chapter XLVI? 
                if row.id not in ids: 
                    chap46sVS.append(row)
                    idsVS.append(row.id)


print('Titles of articles that quote Chapter 46:')
[item.title for item in chap46sVS]

Titles of articles that quote Chapter 46:

['Self-Defeating Politics in George Eliot\'s "Felix Holt"',
 'Self-Defeating Politics in George Eliot\'s "Felix Holt"',
 'Self-Defeating Politics in George Eliot\'s "Felix Holt"',
 'Self-Defeating Politics in George Eliot\'s "Felix Holt"',
 'Victorian Bibliography for 1993',
 'Victorian Bibliography for 1974']


locs_in_A_df = pd.DataFrame(list(zip(idsVS, chap15sVS)), columns = ['id', 'Chap'])


df_filtered = df[df['id'].isin(chap15sVS_ids)]
df_filtered


#df_filtered.to_csv('../../../Middlematch/victorian_studies_chap_15.csv', encoding='utf-8')


victorian_studies_df = df[df['journal'].isin(list_of_VS_journals)]


pd.set_option('display.max_rows', 300)


victorian_studies_df.head(5)


print("Number of  articles in Victorianist journals:")
len(victorian_studies_df)

Number of  articles in Victorianist journals:

1632


plotSynchronicAnalysis(synchronicAnalysis(victorian_studies_df, useWordcounts=False), useWordcounts=False)


# Weighted by wordcount (by the number of words in the quotation) and normalized by decade(counts are scaled to the maximum value per decade)
VictorianStudiesbooksDiaDF = diachronicAnalysis(victorian_studies_df, decades=(1960, 2020), bins=bookLocations, useWordcounts=True, normalize=True).sort_index()
print('Victorianist Quotations per book, per decade (weighted by length of quotation and normalized by decade):')
VictorianStudiesbooksDiaDF

Victorianist Quotations per book, per decade (weighted by length of quotation and normalized by decade):


VictorianStudiesbooksDiaDF['decade'] = VictorianStudiesbooksDiaDF.index


VictorianStudiesbooksMelted = VictorianStudiesbooksDiaDF.melt(id_vars='decade', var_name='book')


# cut out erroneous "book 0" material (ie title page)
VictorianStudiesbooksMelted = VictorianStudiesbooksMelted[VictorianStudiesbooksMelted.book != 0]


#Change scale of the circle markers to a threshold scale (and resize to make the steps in the scale more visible)
alt.Chart(VictorianStudiesbooksMelted, title="Victorianist Middlemarch quotations per book, per decade (weighted by length of quotation and normalized by decade)")\
.mark_rect().encode(x=alt.X('book', title="Book", type='ordinal', axis=alt.Axis(labelAngle=0)), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Amount Quoted")))\
.properties(width=500, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Raw quotation counts (not weighted by the number of words in the quoatation or normalized by decade)
# Turning on useWordcounts makes it so that it's weighted by wordcount. Turning it off uses raw numbers of quotations.
VictorianStudiesbooksNotNormalizedNotWeightedDiaDF = diachronicAnalysis(victorian_studies_df, decades=(1960, 2020), bins=bookLocations,\
                                                        useWordcounts=False, normalize=False).sort_index()
print('Number of quotations per book, per decade in Victorianist journals')
VictorianStudiesbooksNotNormalizedNotWeightedDiaDF

Number of quotations per book, per decade in Victorianist journals


VictorianStudiesbooksNotNormalizedNotWeightedDiaDF['decade'] = VictorianStudiesbooksNotNormalizedNotWeightedDiaDF.index


VictorianStudiesbooksNotNormalizedNotWeightedDiaDFMelted = VictorianStudiesbooksNotNormalizedNotWeightedDiaDF.melt(id_vars='decade', var_name='book')


# cut out erroneous "book 0" material (ie title page)
VictorianStudiesbooksNotNormalizedNotWeightedDiaDFMelted = VictorianStudiesbooksNotNormalizedNotWeightedDiaDFMelted[VictorianStudiesbooksNotNormalizedNotWeightedDiaDFMelted.book != 0]


alt.Chart(VictorianStudiesbooksNotNormalizedNotWeightedDiaDFMelted, title="Victorianist Middlemarch quotations per book, per decade (not weighted or normalized by decade)")\
.mark_rect().encode(x=alt.X('book', title="Book", type='ordinal', axis=alt.Axis(labelAngle=0)), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Number of Quotations")))\
.properties(width=500, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Raw quotation counts (not weighted by the number of words in the quoatation or normalized by decade)
# Turning on useWordcounts makes it so that it's weighted by wordcount. Turning it off uses raw numbers of quotations.
VictorianStudiesdiaDFquoteOnly = diachronicAnalysis(victorian_studies_df, decades=(1960, 2030), bins=chapterLocations, useWordcounts=False, normalize=False).sort_index()
VictorianStudiesdiaDFquoteOnly.columns.name ='chapter'
VictorianStudiesdiaDFquoteOnly.index.name = 'decade'


VictorianStudiesdiaDFquoteOnly


VictorianStudiesdiaDFquoteOnly['decade'] = VictorianStudiesdiaDFquoteOnly.index


VictorianStudiesdiaDFquoteOnlyMelted = VictorianStudiesdiaDFquoteOnly.melt(id_vars='decade')


#Chart with raw quotations
alt.Chart(VictorianStudiesdiaDFquoteOnlyMelted, title="Victorianist Journals Middlemarch quotations per chapter, per decade (not weighted or normalized)")\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Number of Quotations")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)


# Weighted by wordcount (by the number of words in the quoatation) and normalized by decade(counts are scaled to the maximum value per decade)
VictorianStudiesnormalizeddiaDF = diachronicAnalysis(victorian_studies_df, decades=(1960, 2020), bins=chapterLocations, useWordcounts=True, normalize=True).sort_index()
VictorianStudiesnormalizeddiaDF.columns.name = 'chapter'
VictorianStudiesnormalizeddiaDF.index.name = 'decade'


VictorianStudiesnormalizeddiaDF


VictorianStudiesnormalizeddiaDF['decade'] = VictorianStudiesnormalizeddiaDF.index


VictorianStudiesnormalizeddiaMelted = VictorianStudiesnormalizeddiaDF.melt(id_vars='decade')


diachronic_chap_victorianist = alt.Chart(VictorianStudiesnormalizeddiaMelted, title="Victorianist Journals Middlemarch quotations per chapter, per decade (normalized by decade and weighted by word count)")\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Amount Quoted")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
)
diachronic_chap_victorianist


alt.Chart(VictorianStudiesnormalizeddiaMelted, )\
.mark_rect().encode(x=alt.X('chapter', title="Chapter", type='ordinal', axis=alt.Axis(labelAngle=0, values=list(range(0, 87, 5)))), 
                    y=alt.Y('decade', title="Decade",type='ordinal', sort='descending', 
                                                            axis=alt.Axis(labelExpr='datum.value + "s"')), 
                                        color=alt.Color('value', legend=alt.Legend(title="Amount Quoted")))\
.properties(width=1000, height=300).configure_legend(
titleFontSize=14,
labelFontSize=14
).configure_axis(
titleFontSize=14,
labelFontSize=14
).save('Figure-7.png', ppi=300)


articlesWithMatches1960_2015 = articlesWithMatches[articlesWithMatches['Decade'] >= 1960]
len(articlesWithMatches1960_2015)


len(articlesWithMatches1960_2015['year'].value_counts())


sampleData = articlesWithMatches1960_2015.sample(n=56, random_state=56)
sampleData['journal'].value_counts(sort=False)


sampleData.to_csv('../data/sample_dataset.csv', encoding='utf-8')


# Function to loop over each row, extracting locations in A and metadata, then output that to a new text file
def extractSampleDataMatches(sampleData):
    for i, row in sampleData.iterrows():
        title = row['title']
        year = row['year']
        # Print a break between each article
        with open('../data/sample-data-matches.txt', "a") as f:
            print("---------------------------------------\n", file=f)
            print(title, file=f)
            print(year, file=f)
        # For each pair of locations in the "Locations in A" column, iterate over, printing the location indexes
        # Followed by the text of the match
            for pair in row['Locations in A']:
                print(f"Location in A: {pair}", file=f)
                print(mm[pair[0]:pair[1]]+"\n", file=f)
    
extractSampleDataMatches(sampleData)


auth_chart


title_chart


quotes_per_chap


ranked_freq_chap


diachronic_chap


top5_chart


diachronic_chap_victorianist

	Author	1960s	1970s	1980s	1990s	2000s	2010s
0	Bronte	4.238259	10.292524	11.409396	11.659514	8.901252	0.122175
1	Dickens	25.429553	29.902492	30.285235	25.894134	25.173853	25.656689
2	Eliot	21.534937	23.618635	29.194631	22.031474	23.922114	22.052535
3	Hardy	15.349370	23.185265	20.805369	15.021459	9.805285	11.484423

	Title	1960s	1970s	1980s	1990s	2000s	2010s
0	Bleak House	4.238259	4.875406	4.697987	5.007153	5.354659	4.825901
1	David Copperfield	3.207331	3.575298	5.956376	4.220315	2.781641	3.359805
2	Great Expectations	3.436426	3.575298	3.104027	4.363376	3.268428	2.871106
3	Middlemarch	5.841924	6.500542	7.214765	5.865522	5.632823	6.536347

Data field	Definition
'abstract'	abstract for piece (if present), supplied by JSTOR
'creator'	author name supplied by JSTOR
'datePublished'	date on cover, supplied by JSTOR
'Decade'	decade of publication, generated by Middlematch team from 'year'
'docSubType'	item genre (includes "research-article", "book-review", "review-article", "review-essay", "books-received", "discussion", "editorial", "news", "misc", "other"), supplied by JSTOR
'docType'	item type (either "article" or "chapter"), supplied by JSTOR
'id'	text (URL), supplied by JSTOR
'identifier'	identifier metadata, supplied by JSTOR
'isPartOf'	journal or book title, supplied by JSTOR
'issueNumber'	issue or number, supplied by JSTOR
'journal'	journal title, generated by Middlematch team from 'isPartOf'
'keyphrase'	keywords for piece (if present), supplied by JSTOR
'language'	language, supplied by JSTOR
'Locations in A'	index characters for start and end locations for quoted text in source text (A) [Middlemarch], generated by text-matcher
'Locations in A with Wordcounts'	list of pairs of index characters for start and end quote in the source text (A) [Middlemarch] and wordcount for each quotation, generated by generated by Middlematch team from "Locations in A'
'Locations in B'	index characters for start and end locations for quoted text in the target text (B) [a given JSTOR text], generated by text-matcher
'numMatches'	number of matches, generated by text-matcher
'outputFormat'	unigrams, bigrams, trigrams, or fullText, supplied by JSTOR
'pageCount'	number of pages in piece, supplied by JSTOR
'pageEnd'	ending page number, supplied by JSTOR
'pageStart'	starting page number, supplied by JSTOR
'pagination'	page numbers, supplied by JSTOR
'provider'	repository provider name, supplied by JSTOR
'publicationYear'	year of publication, supplied by JSTOR
'publisher'	name of publisher, supplied by JSTOR
'Quoted Words'	total number of words in all quotations in that text, generated by generated by Middlematch team from "Locations in A'
'sourceCategory'	tags for subject heading, supplied by JSTOR
'subTitle'	subtitle of article or piece of writing, supplied by JSTOR
'tdmCategory'	Library of Congress subject heading tags for text data mining, supplied by JSTOR
'title'	title of article or piece of writing, supplied by JSTOR
'url'	URL version of source citation, supplied by JSTOR
'volumeNumber'	journal volume, supplied by JSTOR
'wordCount'	total wordcount of piece, supplied by JSTOR
'Wordcounts'	list of wordcounts for each matched quotation, generated by text-matcher
'year'	year of publication, generated by Middlematch team from 'publicationYear'

	Number of Quotations	Paragraph
0	0	0
1	0	1
2	0	2
3	0	3
4	0	4
...	...	...
4884	2	4884
4885	2	4885
4886	2	4886
4887	31	4887
4888	60	4888

	1	2	3	4	5	6	7	8
1960	1.0	0.890909	0.309091	0.418182	0.418182	0.163636	0.218182	0.981818
1970	1.0	0.642384	0.264901	0.238411	0.218543	0.185430	0.092715	0.456954
1980	1.0	0.792350	0.273224	0.256831	0.158470	0.240437	0.120219	0.639344
1990	1.0	0.914530	0.294872	0.363248	0.277778	0.166667	0.128205	0.572650
2000	1.0	0.867347	0.306122	0.295918	0.239796	0.255102	0.117347	0.408163
2010	1.0	0.920904	0.485876	0.305085	0.225989	0.259887	0.186441	0.627119

	creator	datePublished	docSubType	docType	id	identifier	isPartOf	issueNumber	language	outputFormat	...	title	url	volumeNumber	wordCount	numMatches	Locations in A	Locations in B	abstract	keyphrase	subTitle
0	[Rainer Emig]	2006-01-01	book-review	article	http://www.jstor.org/stable/41158244	[{'name': 'issn', 'value': '03402827'}, {'name...	Amerikastudien / American Studies	3	[eng]	[unigram, bigram, trigram]	...	Review Article	http://www.jstor.org/stable/41158244	51	1109	1	[[130022, 130046]]	[[6851, 6875]]	None	None	None
1	[Martin Green]	1970-01-01	book-review	article	http://www.jstor.org/stable/3722819	[{'name': 'issn', 'value': '00267937'}, {'name...	The Modern Language Review	1	[eng]	[unigram, bigram, trigram]	...	Review Article	http://www.jstor.org/stable/3722819	65	1342	0	[]	[]	None	None	None
2	[Richard Exner]	1982-01-01	book-review	article	http://www.jstor.org/stable/40137021	[{'name': 'issn', 'value': '01963570'}, {'name...	World Literature Today	1	[eng]	[unigram, bigram, trigram]	...	Review Article	http://www.jstor.org/stable/40137021	56	493	0	[]	[]	None	None	None
3	[Ruth Evelyn Henderson]	1925-10-01	research-article	article	http://www.jstor.org/stable/802346	[{'name': 'issn', 'value': '00138274'}, {'name...	The English Journal	8	[eng]	[unigram, bigram, trigram, fullText]	...	American Education Week--November 16-22; Some ...	http://www.jstor.org/stable/802346	14	2161	0	[]	[]	None	None	None
4	[Alan Palmer]	2011-12-01	research-article	article	http://www.jstor.org/stable/10.5325/style.45.4...	[{'name': 'issn', 'value': '00394238'}, {'name...	Style	4	[eng]	[unigram, bigram, trigram]	...	Rejoinder to Response by Marie-Laure Ryan	http://www.jstor.org/stable/10.5325/style.45.4...	45	1127	0	[]	[]	None	None	None
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
5879	[Michaela Giesenkirchen]	2005-10-01	research-article	article	http://www.jstor.org/stable/27747183	[{'name': 'issn', 'value': '15403084'}, {'name...	American Literary Realism	1	[eng]	[unigram, bigram, trigram]	...	Ethnic Types and Problems of Characterization ...	http://www.jstor.org/stable/27747183	38	7349	1	[[23799, 24121]]	[[41472, 41793]]	None	None	None
5880	[Leon Botstein]	2005-07-01	misc	article	http://www.jstor.org/stable/4123220	[{'name': 'issn', 'value': '00274631'}, {'name...	The Musical Quarterly	2	[eng]	[unigram, bigram, trigram]	...	On the Power of Music	http://www.jstor.org/stable/4123220	88	1525	0	[]	[]	None	None	None
5881	[Linda M. Shires]	2013-01-01	research-article	article	http://www.jstor.org/stable/24575734	[{'name': 'issn', 'value': '10601503'}, {'name...	Victorian Literature and Culture	4	[eng]	[unigram, bigram, trigram]	...	HARDY'S MEMORIAL ART: IMAGE AND TEXT IN "WESSE...	http://www.jstor.org/stable/24575734	41	10736	1	[[173657, 173756]]	[[33963, 34061]]	None	None	None
5882	[Edward H. Cohen]	1990-07-01	misc	article	http://www.jstor.org/stable/3827815	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	...	Victorian Bibliography for 1989	http://www.jstor.org/stable/3827815	33	81819	0	[]	[]	None	None	None
5883	None	1964-06-01	misc	article	http://www.jstor.org/stable/2932781	[{'name': 'issn', 'value': '00290564'}, {'name...	Nineteenth-Century Fiction	1	[eng]	[unigram, bigram, trigram]	...	Volume Information	http://www.jstor.org/stable/2932781	19	694	0	[]	[]	None	None	None

	Number of Words Quoted	Chapter
0	3919	0
1	6284	1
2	2412	2
3	2915	3
4	513	4
...	...	...
83	1000	83
84	180	84
85	1485	85
86	184	86
87	5219	87

	Number of Quotations	Chapter
0	159	0
1	197	1
2	89	2
3	114	3
4	40	4
...	...	...
83	40	83
84	10	84
85	4	85
86	29	86
87	187	87

	1	2	3	4	5	6	7	8
1960	0.567051	1.000000	0.229606	0.354556	0.481098	0.068046	0.166335	0.467967
1970	1.000000	0.839023	0.271083	0.264752	0.206195	0.148994	0.121411	0.555505
1980	0.877438	0.946007	0.268733	0.287826	0.156231	0.250462	0.099569	1.000000
1990	0.816222	1.000000	0.208229	0.418430	0.176811	0.106744	0.078743	0.528329
2000	1.000000	0.964953	0.528371	0.265020	0.223465	0.289386	0.079940	0.463117
2010	0.748342	1.000000	0.519065	0.374503	0.152023	0.190650	0.188992	0.539954

chapter	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30	31	32	33	34	35	36	37	38	39	40	41	42	43	44	45	46	47	48	49	50	51	52	53	54	55	56	57	58	59	60	61	62	63	64	65	66	67	68	69	70	71	72	73	74	75	76	77	78	79	80	81	82	83	84	85	86	87
decade
1960	8	7	5	4	2	3	0	6	1	6	8	3	2	0	0	8	3	3	0	1	20	11	3	2	1	1	1	7	3	0	1	1	0	0	0	5	2	4	0	2	3	6	1	2	0	0	7	3	1	0	4	1	2	3	1	0	3	0	1	0	3	1	0	1	2	0	2	0	1	0	0	6	0	1	0	4	6	2	6	1	4	12	1	2	0	0	6	9
1970	20	26	20	24	8	4	11	8	5	8	7	8	2	2	1	21	14	1	3	9	27	8	11	0	3	1	3	14	12	3	1	3	0	0	1	0	2	11	1	4	2	3	12	9	1	4	6	1	5	0	4	1	1	1	4	2	5	2	8	1	3	2	1	1	3	1	0	0	2	3	1	3	3	2	7	1	4	1	1	1	10	9	0	4	1	0	3	22
1980	34	41	13	21	4	10	4	8	0	13	17	9	9	9	4	38	17	2	6	18	37	9	5	11	2	0	2	8	7	12	3	4	1	0	6	1	4	20	0	9	0	0	7	7	0	2	1	10	1	0	3	5	0	0	10	4	4	0	11	2	2	6	5	1	9	4	2	1	0	2	0	3	9	2	3	2	15	6	4	0	17	13	1	7	0	4	3	31
1990	38	38	20	23	5	14	13	10	1	14	24	21	14	9	4	66	11	10	11	34	41	15	12	1	6	4	6	27	5	8	1	6	1	4	5	7	18	16	1	7	0	10	21	2	1	19	8	5	11	0	14	1	2	2	2	4	3	1	10	3	4	7	5	6	3	5	1	2	3	0	6	4	6	1	4	3	10	2	13	2	17	20	2	15	3	0	3	33
2000	28	30	19	23	12	8	7	8	2	26	22	7	4	4	1	27	26	2	4	26	45	12	23	9	7	3	2	7	4	5	5	4	4	10	0	3	4	18	2	8	6	2	15	6	1	9	1	1	9	3	10	1	1	5	10	4	7	5	4	1	8	8	3	5	0	0	2	0	3	7	3	3	5	1	0	4	9	2	4	2	6	5	0	5	2	0	6	29
2010	21	53	12	13	9	5	10	4	2	17	17	7	7	2	1	32	15	4	10	24	39	21	15	7	20	2	0	20	22	5	1	7	2	0	2	2	10	10	1	12	5	4	8	6	0	4	2	0	8	0	5	8	0	7	1	6	7	0	15	0	7	8	2	7	5	0	1	2	2	1	4	11	4	3	7	3	3	4	0	0	15	11	1	7	0	0	2	51

chapter	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30	31	32	33	34	35	36	37	38	39	40	41	42	43	44	45	46	47	48	49	50	51	52	53	54	55	56	57	58	59	60	61	62	63	64	65	66	67	68	69	70	71	72	73	74	75	76	77	78	79	80	81	82	83	84	85	86	87
decade
1960	0.119514	0.119514	0.266106	0.068161	0.012138	0.022409	0.000000	0.104575	0.008403	0.267974	0.230626	0.036415	0.074697	0.000000	0.000000	0.621849	0.272642	0.020542	0.000000	0.005602	1.000000	0.372549	0.053221	0.019608	0.013072	0.005602	0.004669	0.183940	0.298786	0.000000	0.006536	0.006536	0.000000	0.000000	0.000000	0.106443	0.048553	0.156863	0.000000	0.030812	0.045752	0.320261	0.123249	0.024276	0.000000	0.000000	0.326797	0.047619	0.026144	0.000000	0.228758	0.007470	0.053221	0.414566	0.002801	0.000000	0.102708	0.000000	0.015873	0.000000	0.014006	0.024276	0.000000	0.020542	0.022409	0.000000	0.074697	0.000000	0.007470	0.000000	0.000000	0.265173	0.000000	0.004669	0.000000	0.014939	0.130719	0.017740	0.161531	0.039216	0.183007	0.225957	0.003735	0.065359	0.000000	0.0	0.022409	0.228758
1970	0.224055	0.575258	0.584192	0.393814	0.107904	0.061856	0.360825	0.115464	0.158076	0.087973	0.079725	0.226804	0.063918	0.004124	0.002749	0.627491	0.340893	0.004811	0.012371	0.219244	1.000000	0.134021	0.204811	0.000000	0.061168	0.002062	0.009622	0.381443	0.229553	0.094845	0.003436	0.041924	0.000000	0.000000	0.032990	0.000000	0.062543	0.188316	0.007560	0.058419	0.018557	0.107216	0.329210	0.197938	0.004124	0.031615	0.074914	0.013058	0.151890	0.000000	0.071478	0.004124	0.018557	0.059107	0.087285	0.065979	0.103093	0.012371	0.121649	0.003436	0.009622	0.046048	0.003436	0.017869	0.226804	0.004811	0.000000	0.000000	0.070790	0.021306	0.009622	0.017869	0.051546	0.023368	0.523024	0.002749	0.131959	0.035052	0.010309	0.013058	0.327148	0.111340	0.000000	0.037113	0.004811	0.0	0.008247	0.408935
1980	0.437037	0.772391	0.164310	0.377778	0.032997	0.249832	0.106397	0.175758	0.000000	0.105051	0.212121	0.160269	0.084175	0.092256	0.061953	0.801347	0.377104	0.035690	0.099663	0.400673	0.918519	0.242424	0.073401	0.235690	0.010774	0.000000	0.006061	0.151515	0.084848	0.296970	0.047138	0.022222	0.026263	0.000000	0.064646	0.041751	0.042424	0.253872	0.000000	0.415488	0.000000	0.000000	0.125926	0.127273	0.000000	0.008081	0.004040	0.191919	0.006061	0.000000	0.098990	0.076094	0.000000	0.000000	0.151515	0.152189	0.019529	0.000000	0.204040	0.028283	0.032323	0.191919	0.041751	0.012795	0.129293	0.055219	0.006734	0.003367	0.000000	0.030976	0.000000	0.088215	0.144108	0.245791	0.058586	0.024242	0.292929	0.103704	0.064646	0.000000	0.360269	0.303030	0.010101	0.085522	0.000000	1.0	0.015488	0.571717
1990	0.514066	0.436061	0.150043	0.271952	0.046462	0.135550	0.196505	0.068201	0.013640	0.075021	0.249361	0.264706	0.227195	0.072890	0.033674	1.000000	0.237425	0.109122	0.113811	0.511509	0.777494	0.290708	0.093777	0.008525	0.073316	0.028986	0.060102	0.259165	0.084825	0.064791	0.001705	0.034527	0.002131	0.057118	0.193095	0.201194	0.185848	0.176897	0.003410	0.078005	0.000000	0.162830	0.355499	0.014493	0.002131	0.151321	0.026002	0.057545	0.164535	0.000000	0.121910	0.016198	0.004689	0.014493	0.017903	0.034101	0.018329	0.001705	0.082268	0.038363	0.027280	0.092498	0.033674	0.020034	0.005968	0.057971	0.002558	0.004689	0.029838	0.000000	0.069480	0.064791	0.065217	0.022592	0.060529	0.054135	0.110401	0.048167	0.126598	0.043905	0.298380	0.255754	0.022592	0.217818	0.013214	0.0	0.009804	0.364024
2000	0.452062	0.739689	0.176754	0.371184	0.052491	0.179968	0.151044	0.069095	0.009641	0.302625	0.400107	0.269416	0.035351	0.028923	0.006427	0.470809	0.305838	0.020889	0.101768	0.539904	1.000000	0.146224	0.476165	0.212641	0.217461	0.061061	0.006963	0.111944	0.252276	0.194965	0.062132	0.055169	0.089984	0.431173	0.000000	0.027317	0.023032	0.258168	0.018211	0.103910	0.088913	0.044992	0.286020	0.084092	0.002142	0.121585	0.008570	0.002142	0.167649	0.045528	0.118372	0.003749	0.063739	0.099625	0.133905	0.152651	0.151580	0.046599	0.065345	0.016069	0.023032	0.257097	0.082485	0.042849	0.000000	0.000000	0.025174	0.000000	0.073380	0.041778	0.047134	0.026245	0.046599	0.005892	0.000000	0.059454	0.333690	0.069630	0.065345	0.026781	0.246920	0.042314	0.000000	0.051955	0.021960	0.0	0.035886	0.479914
2010	0.212549	0.984737	0.198417	0.158282	0.049180	0.044658	0.136235	0.038440	0.016959	0.239118	0.295647	0.077445	0.100057	0.022046	0.010741	0.613906	0.273036	0.039570	0.159412	0.349915	0.733748	1.000000	0.207462	0.033352	0.662521	0.023742	0.000000	0.348785	0.446580	0.118146	0.002826	0.105144	0.028830	0.000000	0.033917	0.044658	0.186546	0.163934	0.009045	0.356699	0.183154	0.196721	0.102318	0.066704	0.000000	0.054268	0.065574	0.000000	0.055964	0.000000	0.108536	0.109666	0.000000	0.057660	0.003957	0.079141	0.107971	0.000000	0.234596	0.000000	0.148672	0.053703	0.022046	0.114754	0.057660	0.000000	0.005653	0.026003	0.029960	0.010175	0.207462	0.192764	0.041266	0.022046	0.209158	0.018089	0.107405	0.028265	0.000000	0.000000	0.287733	0.192764	0.044658	0.079706	0.000000	0.0	0.004522	0.805540

	creator	datePublished	docSubType	docType	id	identifier	isPartOf	issueNumber	language	outputFormat	pageCount	pageEnd	pageStart	pagination	provider	publicationYear	publisher	sourceCategory	tdmCategory	title	url	volumeNumber	wordCount	numMatches	Locations in A	Locations in B	abstract	keyphrase	subTitle	year	Decade	Quoted Words	Locations in A with Wordcounts	Wordcounts	journal
37	[ELIZABETH WINSTON]	1995-09-01	book-review	article	http://www.jstor.org/stable/43595523	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	28/29	[eng]	[unigram, bigram, trigram]	6.0	106	101	pp. 101-106	jstor	1995	Penn State University Press	[Language & Literature, Humanities]	[Arts - Literature]	Review Article	http://www.jstor.org/stable/43595523	None	1981	0	[]	[]	None	None	None	1995	1990	0	[]	[]	George Eliot - George Henry Lewes Studies
76	[Katherine Newey]	2011-09-01	research-article	article	http://www.jstor.org/stable/42827892	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	60/61	[eng]	[unigram, bigram, trigram]	16.0	141	126	pp. 126-141	jstor	2011	Penn State University Press	[Language & Literature, Humanities]	[Arts - Literature]	The "British Matron" and the Poetic Drama: The...	http://www.jstor.org/stable/42827892	None	7038	1	[[502448, 502471]]	[[18540, 18563]]	None	None	None	2011	2010	4	[([502448, 502471], 4)]	[4]	George Eliot - George Henry Lewes Studies
101	None	2015-11-01	other	article	http://www.jstor.org/stable/10.5325/georeliogh...	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	2	[eng]	[unigram, bigram, trigram]	2.0	ii	i	pp. i-ii	jstor	2015	Penn State University Press	[Language & Literature, Humanities]	None	Front Matter	http://www.jstor.org/stable/10.5325/georeliogh...	67	438	0	[]	[]	None	None	None	2015	2010	0	[]	[]	George Eliot - George Henry Lewes Studies
107	[AVROM FLEISHMAN]	2008-09-01	research-article	article	http://www.jstor.org/stable/42827960	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	54/55	[eng]	[unigram, bigram, trigram]	79.0	76	1	pp. 1-76	jstor	2008	Penn State University Press	[Language & Literature, Humanities]	[Arts - Performing arts]	GEORGE ELIOT'S READING: A CHRONOLOGICAL LIST	http://www.jstor.org/stable/42827960	None	22729	1	[[1138948, 1138968]]	[[73073, 73093]]	None	None	None	2008	2000	4	[([1138948, 1138968], 4)]	[4]	George Eliot - George Henry Lewes Studies
108	[Judith Adler]	2018-10-01	research-article	article	http://www.jstor.org/stable/10.5325/georeliogh...	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	2	[eng]	[unigram, bigram, trigram]	29.0	171	143	pp. 143-171	jstor	2018	Penn State University Press	[Language & Literature, Humanities]	[Arts - Literature]	Hidden Allusion in the Finale of <em>Middlemar...	http://www.jstor.org/stable/10.5325/georeliogh...	70	9258	1	[[1792915, 1793447]]	[[350, 876]]	This article argues that the famous concluding...	None	None	2018	2010	97	[([1792915, 1793447], 97)]	[97]	George Eliot - George Henry Lewes Studies
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
5798	[TERENCE R. WRIGHT]	1995-09-01	book-review	article	http://www.jstor.org/stable/43595524	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	28/29	[eng]	[unigram, bigram, trigram]	3.0	109	107	pp. 107-109	jstor	1995	Penn State University Press	[Language & Literature, Humanities]	None	Review Article	http://www.jstor.org/stable/43595524	None	862	0	[]	[]	None	None	None	1995	1990	0	[]	[]	George Eliot - George Henry Lewes Studies
5835	[SALEEL NURBHAI]	1997-09-01	research-article	article	http://www.jstor.org/stable/42827636	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	32/33	[eng]	[unigram, bigram, trigram]	18.0	18	1	pp. 1-18	jstor	1997	Penn State University Press	[Language & Literature, Humanities]	[Arts - Literature]	JEWISH MYTH IN GEORGE ELIOT'S FICTION	http://www.jstor.org/stable/42827636	None	6755	5	[[190333, 190518], [939772, 940069], [940403, ...	[[30280, 30465], [30822, 31114], [31125, 31327...	None	None	None	1997	1990	161	[([190333, 190518], 36), ([939772, 940069], 59...	[36, 59, 39, 9, 18]	George Eliot - George Henry Lewes Studies
5853	[DONALD HAWES]	2001-09-01	research-article	article	http://www.jstor.org/stable/42827734	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	40/41	[eng]	[unigram, bigram, trigram]	8.0	75	68	pp. 68-75	jstor	2001	Penn State University Press	[Language & Literature, Humanities]	[Arts - Literature]	GEORGE ELIOT AND GEORGE HENRY LEWES: SELECTED ...	http://www.jstor.org/stable/42827734	None	2902	1	[[1316376, 1316406]]	[[3676, 3706]]	None	None	None	2001	2000	5	[([1316376, 1316406], 5)]	[5]	George Eliot - George Henry Lewes Studies
5865	None	1995-09-01	misc	article	http://www.jstor.org/stable/43595525	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	28/29	[eng]	[unigram, bigram, trigram]	5.0	None	None	None	jstor	1995	Penn State University Press	[Language & Literature, Humanities]	[Arts - Literature]	Back Matter	http://www.jstor.org/stable/43595525	None	1147	0	[]	[]	None	None	None	1995	1990	0	[]	[]	George Eliot - George Henry Lewes Studies
5876	[BUFF LINDAU]	2013-10-01	research-article	article	http://www.jstor.org/stable/42827928	[{'name': 'issn', 'value': '23721901'}, {'name...	George Eliot - George Henry Lewes Studies	64/65	[eng]	[unigram, bigram, trigram]	1.0	109	109	p. 109	jstor	2013	Penn State University Press	[Language & Literature, Humanities]	None	A GEORGE ELIOT NOTE	http://www.jstor.org/stable/42827928	None	129	0	[]	[]	None	None	None	2013	2010	0	[]	[]	George Eliot - George Henry Lewes Studies

	1	2	3	4	5	6	7	8
1990	1.000000	0.658422	0.133021	0.121658	0.098930	0.116310	0.097594	0.231283
2000	0.832359	0.805068	0.736842	1.000000	0.508772	0.666667	0.249513	0.586745
2010	1.000000	0.613139	0.811436	0.364964	0.004866	0.074209	0.209246	0.542579

chapter	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30	31	32	33	34	35	36	37	38	39	40	41	42	43	44	45	46	47	48	49	50	51	52	53	54	55	56	57	58	59	60	61	62	63	64	65	66	67	68	69	70	71	72	73	74	75	76	77	78	79	80	81	82	83	84	85	86	87
decade
1990	0.017699	0.353982	0.042035	0.258850	0.015487	0.329646	0.320796	0.196903	0.070796	0.159292	0.358407	0.482301	0.703540	0.050885	0.057522	0.599558	0.000000	0.000000	0.000000	0.387168	1.000000	0.066372	0.017699	0.000000	0.108407	0.057522	0.0	0.050885	0.011062	0.026549	0.000000	0.0	0.0	0.185841	0.0	0.039823	0.000000	0.362832	0.000000	0.000000	0.000000	0.0	0.000000	0.037611	0.000000	0.276549	0.013274	0.0	0.000000	0.000000	0.000000	0.0	0.0	0.000000	0.000000	0.000000	0.081858	0.008850	0.000000	0.0	0.000000	0.294248	0.000000	0.000000	0.0	0.0	0.0	0.0	0.000000	0.000000	0.0	0.323009	0.000000	0.0	0.066372	0.000000	0.011062	0.0	0.0	0.227876	0.000000	0.000000	0.0	0.128319	0.0	0.0	0.042035	0.289823
2000	0.091575	0.161172	0.139194	0.238095	0.032967	0.000000	0.000000	0.000000	0.000000	0.457875	0.443223	0.000000	0.000000	0.000000	0.043956	1.000000	0.194139	0.000000	0.018315	0.080586	0.175824	0.000000	0.000000	0.000000	0.076923	0.395604	0.0	0.018315	0.468864	0.000000	0.424908	0.0	0.0	0.000000	0.0	0.000000	0.000000	0.703297	0.106227	0.146520	0.018315	0.0	0.904762	0.223443	0.014652	0.219780	0.000000	0.0	0.131868	0.311355	0.054945	0.0	0.0	0.000000	0.164835	0.000000	0.201465	0.238095	0.000000	0.0	0.000000	0.402930	0.245421	0.000000	0.0	0.0	0.0	0.0	0.413919	0.029304	0.0	0.025641	0.000000	0.0	0.000000	0.205128	0.000000	0.0	0.0	0.000000	0.000000	0.179487	0.0	0.245421	0.0	0.0	0.000000	0.472527
2010	0.288939	0.349887	0.273138	0.045147	0.000000	0.018059	0.275395	0.079007	0.049661	0.108352	0.139955	0.000000	0.227991	0.000000	0.000000	0.142212	0.000000	0.011287	0.000000	0.564334	0.158014	0.160271	0.101580	0.081264	1.000000	0.009029	0.0	0.000000	0.000000	0.415350	0.000000	0.0	0.0	0.000000	0.0	0.178330	0.045147	0.000000	0.000000	0.173815	0.085779	0.0	0.194131	0.000000	0.000000	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.0	0.0	0.009029	0.000000	0.058691	0.054176	0.000000	0.013544	0.0	0.011287	0.000000	0.000000	0.388262	0.0	0.0	0.0	0.0	0.000000	0.000000	0.0	0.000000	0.038375	0.0	0.000000	0.000000	0.000000	0.0	0.0	0.000000	0.108352	0.000000	0.0	0.018059	0.0	0.0	0.000000	0.841986

	creator	datePublished	docSubType	docType	id	identifier	isPartOf	issueNumber	language	outputFormat	pageCount	pageEnd	pageStart	pagination	provider	publicationYear	publisher	sourceCategory	tdmCategory	title	url	volumeNumber	wordCount	numMatches	Locations in A	Locations in B	abstract	keyphrase	subTitle	year	Decade	Quoted Words	Locations in A with Wordcounts	Wordcounts	journal
14	[Catherine Gallagher]	2006-10-01	book-review	article	http://www.jstor.org/stable/4618956	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	1	[eng]	[unigram, bigram, trigram]	3.0	111	109	pp. 109-111	jstor	2006	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature]	Review Article	http://www.jstor.org/stable/4618956	49	1378	0	[]	[]	None	None	None	2006	2000	0	[]	[]	Victorian Studies
16	[Angelique Richardson]	2006-07-01	book-review	article	http://www.jstor.org/stable/4618943	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	4.0	769	766	pp. 766-769	jstor	2006	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature]	Review Article	http://www.jstor.org/stable/4618943	48	1482	0	[]	[]	None	None	None	2006	2000	0	[]	[]	Victorian Studies
31	None	1960-03-01	misc	article	http://www.jstor.org/stable/3825496	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	3	[eng]	[unigram, bigram, trigram]	6.0	316	297	pp. 297-316	jstor	1960	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature, Arts - Performing arts]	Front Matter	http://www.jstor.org/stable/3825496	3	1043	0	[]	[]	None	None	None	1960	1960	0	[]	[]	Victorian Studies
39	[A. S. Crehan]	1976-03-01	research-article	article	http://www.jstor.org/stable/3826133	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	3	[eng]	[unigram, bigram, trigram]	31.0	409	379	pp. 379-409	jstor	1976	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature]	Victorian Literature: Materials for Teaching a...	http://www.jstor.org/stable/3826133	19	13133	0	[]	[]	None	None	None	1976	1970	0	[]	[]	Victorian Studies
41	[Ronald E. Freeman]	1968-06-01	misc	article	http://www.jstor.org/stable/3825239	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	60.0	614	555	pp. 555-614	jstor	1968	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Performing arts]	Victorian Bibliography for 1967	http://www.jstor.org/stable/3825239	11	36967	0	[]	[]	None	None	None	1968	1960	0	[]	[]	Victorian Studies
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
5872	[Chris R. Vanden Bossche]	1987-07-01	book-review	article	http://www.jstor.org/stable/3828188	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	2.0	556	555	pp. 555-556	jstor	1987	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature, Philosophy - Applied philo...	Review Article	http://www.jstor.org/stable/3828188	30	1676	0	[]	[]	None	None	None	1987	1980	0	[]	[]	Victorian Studies
5873	[Anita Levy]	1996-04-01	book-review	article	http://www.jstor.org/stable/3829465	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	3	[eng]	[unigram, bigram, trigram]	3.0	431	429	pp. 429-431	jstor	1996	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature]	Review Article	http://www.jstor.org/stable/3829465	39	1588	0	[]	[]	None	None	None	1996	1990	0	[]	[]	Victorian Studies
5874	None	1990-07-01	misc	article	http://www.jstor.org/stable/3827789	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	10.0	826	817	pp. 817-826	jstor	1990	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature]	Volume Information	http://www.jstor.org/stable/3827789	33	4595	0	[]	[]	None	None	None	1990	1990	0	[]	[]	Victorian Studies
5875	None	2011-10-01	misc	article	http://www.jstor.org/stable/10.2979/victorians...	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	1	[eng]	[unigram, bigram, trigram]	7.0	190	185	pp. 185-190	jstor	2011	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature]	Contributors	http://www.jstor.org/stable/10.2979/victorians...	54	2413	0	[]	[]	None	None	None	2011	2010	0	[]	[]	Victorian Studies
5882	[Edward H. Cohen]	1990-07-01	misc	article	http://www.jstor.org/stable/3827815	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	132.0	812	681	pp. 681-812	jstor	1990	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Performing arts, Philosophy - Applied ...	Victorian Bibliography for 1989	http://www.jstor.org/stable/3827815	33	81819	0	[]	[]	None	None	None	1990	1990	0	[]	[]	Victorian Studies

	1	2	3	4	5	6	7	8
1960	55	49	17	23	23	9	12	54
1970	151	97	40	36	33	28	14	69
1980	183	145	50	47	29	44	22	117
1990	234	214	69	85	65	39	30	134
2000	196	170	60	58	47	50	23	80
2010	177	163	86	54	40	46	33	111

	1	2	3	4	5	6	7	8
1960	0.977456	0.861514	0.201288	0.800322	1.000000	0.154589	0.457327	0.487923
1970	1.000000	0.594525	0.186983	0.226240	0.231921	0.186467	0.163223	0.213843
1980	0.423432	0.899908	0.212638	0.004151	0.096402	0.105627	0.068266	1.000000
1990	1.000000	0.717102	0.208744	0.418345	0.150450	0.131162	0.096442	0.333048
2000	0.705305	1.000000	0.330361	0.357111	0.190370	0.362015	0.094516	0.349532
2010	0.698289	1.000000	0.442054	0.507090	0.244010	0.345232	0.225428	0.900244

chapter	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30	31	32	33	34	35	36	37	38	39	40	41	42	43	44	45	46	47	48	49	50	51	52	53	54	55	56	57	58	59	60	61	62	63	64	65	66	67	68	69	70	71	72	73	74	75	76	77	78	79	80	81	82	83	84	85	86	87
decade
1960	0.189189	0.263514	0.150901	0.060811	0.020270	0.018018	0.000000	0.022523	0.020270	0.000000	0.416667	0.024775	0.180180	0.000000	0.000000	0.018018	0.000000	0.000000	0.000000	0.000000	0.849099	0.308559	0.029279	0.038288	0.031532	0.000000	0.000000	0.074324	0.121622	0.000000	0.015766	0.000000	0.000000	0.000000	0.000000	0.256757	0.000000	0.027027	0.00000	0.074324	0.096847	0.367117	0.297297	0.000000	0.000000	0.000000	0.049550	0.103604	0.063063	0.000000	0.036036	0.018018	0.128378	1.000000	0.006757	0.000000	0.112613	0.000000	0.038288	0.000	0.000000	0.058559	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.639640	0.000000	0.000000	0.000000	0.000000	0.213964	0.000000	0.000000	0.000000	0.425676	0.000000	0.009009	0.000000	0.000000	0.0	0.000000	0.033784
1970	0.055777	0.362550	1.000000	0.330677	0.138114	0.005312	0.151394	0.122178	0.138114	0.120850	0.015936	0.130146	0.000000	0.000000	0.000000	0.212483	0.000000	0.009296	0.007968	0.266932	0.798141	0.033201	0.200531	0.000000	0.000000	0.000000	0.005312	0.203187	0.272244	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.175299	0.00000	0.000000	0.000000	0.156707	0.249668	0.365206	0.000000	0.034529	0.134130	0.025232	0.022576	0.000000	0.006640	0.007968	0.000000	0.000000	0.066401	0.108898	0.199203	0.000000	0.098274	0.000	0.006640	0.000000	0.000000	0.000000	0.383798	0.000000	0.000000	0.000000	0.000000	0.015936	0.000000	0.019920	0.000000	0.045153	0.167331	0.000000	0.000000	0.000000	0.000000	0.000000	0.126162	0.022576	0.000000	0.041169	0.000000	0.0	0.000000	0.147410
1980	0.089562	0.202020	0.010774	0.018855	0.000000	0.025589	0.077441	0.057239	0.000000	0.000000	0.028283	0.108418	0.000000	0.000000	0.006734	0.249158	0.193939	0.000000	0.065993	0.274074	0.408081	0.112458	0.003367	0.221549	0.004040	0.000000	0.000000	0.035690	0.005387	0.017508	0.000000	0.000000	0.026263	0.000000	0.000000	0.000000	0.003367	0.002694	0.00000	0.000000	0.000000	0.000000	0.000000	0.063300	0.000000	0.004714	0.000000	0.000000	0.000000	0.000000	0.000000	0.072727	0.000000	0.000000	0.041077	0.092256	0.006734	0.000000	0.000000	0.000	0.000000	0.000000	0.014141	0.000000	0.022896	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.076768	0.028956	0.000000	0.000000	0.000000	0.131313	0.040404	0.000000	0.000000	0.081481	0.051852	0.000000	0.035690	0.000000	1.0	0.012795	0.077441
1990	0.144444	0.386111	0.152778	0.208333	0.009722	0.298611	0.409722	0.150000	0.044444	0.131944	0.458333	0.395833	0.456944	0.031944	0.036111	1.000000	0.055556	0.102778	0.034722	0.263889	0.711111	0.041667	0.038889	0.000000	0.077778	0.036111	0.005556	0.351389	0.006944	0.076389	0.005556	0.000000	0.000000	0.116667	0.000000	0.025000	0.376389	0.359722	0.00000	0.109722	0.000000	0.123611	0.361111	0.023611	0.000000	0.337500	0.041667	0.000000	0.000000	0.000000	0.023611	0.052778	0.008333	0.000000	0.000000	0.009722	0.051389	0.005556	0.048611	0.125	0.000000	0.184722	0.000000	0.050000	0.006944	0.008333	0.000000	0.015278	0.000000	0.000000	0.029167	0.202778	0.000000	0.000000	0.041667	0.000000	0.036111	0.156944	0.000000	0.143056	0.000000	0.077778	0.000000	0.179167	0.043056	0.0	0.026389	0.375000
2000	0.245902	0.142497	0.184111	0.131148	0.031526	0.022699	0.095839	0.075662	0.007566	0.486759	0.234552	0.336696	0.000000	0.000000	0.015132	0.360656	0.102144	0.000000	0.012610	1.000000	0.421185	0.108449	0.808323	0.000000	0.026482	0.143758	0.011349	0.012610	0.593947	0.000000	0.146280	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.491803	0.03657	0.109710	0.006305	0.000000	0.365700	0.123581	0.005044	0.075662	0.020177	0.000000	0.146280	0.107188	0.036570	0.008827	0.000000	0.015132	0.214376	0.300126	0.069357	0.102144	0.114754	0.000	0.000000	0.138714	0.084489	0.083228	0.000000	0.000000	0.000000	0.000000	0.165195	0.010088	0.000000	0.008827	0.021438	0.013871	0.000000	0.114754	0.051702	0.069357	0.116015	0.000000	0.000000	0.094578	0.000000	0.122320	0.012610	0.0	0.000000	0.372005
2010	0.209424	0.228272	0.274346	0.059686	0.023037	0.033508	0.140314	0.036649	0.031414	0.055497	0.160209	0.063874	0.179058	0.040838	0.000000	0.365445	0.352880	0.036649	0.063874	0.344503	0.515183	0.330890	0.091099	0.037696	0.463874	0.004188	0.000000	0.063874	0.059686	0.202094	0.000000	0.115183	0.000000	0.000000	0.052356	0.082723	0.190576	0.080628	0.00000	0.350785	0.039791	0.176963	0.112042	0.004188	0.000000	0.081675	0.000000	0.000000	0.008377	0.000000	0.158115	0.198953	0.000000	0.071204	0.007330	0.116230	0.126702	0.000000	0.225131	0.000	0.218848	0.045026	0.000000	0.212565	0.056545	0.000000	0.010471	0.048168	0.000000	0.000000	0.026178	0.128796	0.060733	0.035602	0.010471	0.033508	0.190576	0.047120	0.000000	0.000000	0.200000	0.165445	0.082723	0.097382	0.000000	0.0	0.004188	1.000000

	Number of Quotations	Chapter
0	159	0
1	197	1
2	89	2
3	114	3
4	40	4
...	...	...
83	40	83
84	10	84
85	4	85
86	29	86
87	187	87

	1	2	3	4	5	6	7	8
1960	1.000000	0.067227	0.336134	0.000000	0.000000	0.218487	0.000000	0.151261
1970	0.121107	0.820069	0.000000	0.688581	0.349481	0.480969	1.000000	0.318339
1980	1.000000	0.486111	0.000000	0.000000	0.000000	0.250000	0.000000	0.000000
1990	0.037383	0.607477	0.000000	0.000000	0.056075	0.000000	0.224299	1.000000
2000	0.035264	1.000000	0.012594	0.035264	0.000000	0.000000	0.000000	0.166247
2010	0.513514	1.000000	0.221622	0.591892	0.032432	0.351351	0.016216	0.589189

	creator	datePublished	docSubType	docType	id	identifier	isPartOf	issueNumber	language	outputFormat	pageCount	pageEnd	pageStart	pagination	provider	publicationYear	publisher	sourceCategory	tdmCategory	title	url	volumeNumber	wordCount	numMatches	Locations in A	Locations in B	abstract	keyphrase	subTitle	year	Decade	Quoted Words	Locations in A with Wordcounts	Wordcounts	journal
1433	[Julie English Early]	1993-07-01	book-review	article	http://www.jstor.org/stable/3828662	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	3.0	495	493	pp. 493-495	jstor	1993	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature]	Review Article	http://www.jstor.org/stable/3828662	36	2433	1	[[302998, 303063]]	[[13798, 13863]]	None	None	None	1993	1990	10	[([302998, 303063], 10)]	[10]	Victorian Studies
1645	[Peter Allan Dale]	1993-07-01	book-review	article	http://www.jstor.org/stable/3828663	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	2.0	496	495	pp. 495-496	jstor	1993	Indiana University Press	[Language & Literature, History, British Studi...	[Philosophy - Epistemology, Philosophy - Appli...	Review Article	http://www.jstor.org/stable/3828663	36	1653	1	[[302998, 303063]]	[[3630, 3695]]	None	None	None	1993	1990	10	[([302998, 303063], 10)]	[10]	Victorian Studies
3419	[Daniel Wright]	2014-07-01	research-article	article	http://www.jstor.org/stable/10.2979/victorians...	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	4	[eng]	[unigram, bigram, trigram]	24.0	648	625	pp. 625-648	jstor	2014	Indiana University Press	[Language & Literature, History, British Studi...	[Philosophy - Metaphysics]	George Eliot's Vagueness	http://www.jstor.org/stable/10.2979/victorians...	56	10581	13	[[25917, 26218], [25962, 26223], [77391, 77419...	[[17052, 17375], [17122, 17380], [18281, 18309...	Abstract This essay examines George Eliot's pr...	None	None	2014	2010	453	[([25917, 26218], 64), ([25962, 26223], 57), (...	[64, 57, 5, 17, 45, 98, 8, 46, 10, 22, 13, 17,...	Victorian Studies
4597	[David R. Carroll]	1964-09-01	book-review	article	http://www.jstor.org/stable/3825213	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	1	[eng]	[unigram, bigram, trigram]	3.0	79	77	pp. 77-79	jstor	1964	Indiana University Press	[Language & Literature, History, British Studi...	[Philosophy - Metaphysics]	Review Article	http://www.jstor.org/stable/3825213	8	2287	1	[[309151, 309210]]	[[8114, 8173]]	None	None	None	1964	1960	8	[([309151, 309210], 8)]	[8]	Victorian Studies
5014	[Jill L. Matus]	2006-10-01	book-review	article	http://www.jstor.org/stable/4618985	[{'name': 'issn', 'value': '00425222'}, {'name...	Victorian Studies	1	[eng]	[unigram, bigram, trigram]	3.0	169	167	pp. 167-169	jstor	2006	Indiana University Press	[Language & Literature, History, British Studi...	[Arts - Literature]	Review Article	http://www.jstor.org/stable/4618985	49	1049	1	[[291891, 291940]]	[[263, 320]]	None	None	None	2006	2000	7	[([291891, 291940], 7)]	[7]	Victorian Studies

Analysis of Text Matching Data Generated from JSTOR Middlemarch Dataset¶

Table of Contents

Author and title references in Victorian Studies¶

Most frequent author references in Victorian Studies¶

Most frequent author references in Victorian Studies, line chart¶

Most frequent author references in Victorian Studies, line chart (color)¶

Most frequent title references in Victorian Studies¶

Most frequent title references in Victorian Studies, line chart¶

Most frequent title references in Victorian Studies, line chart (color)¶

Middlemarch statistics¶

Generating Middlemarch chapter and book locations¶

Statistics on our dataset of JSTOR matches¶

Read in our text-matcher JSTOR data¶

Data Dictionary for text matcher dataset¶

Functions for extracting wordcounts, numbers of quotations for diachronic and synchronic analysis¶

Adding additional rows to DataFrame¶

How many quotations do we have?¶

Total number of matches¶

How many total articles do we have?¶

How many articles with matches do we have?¶

How many articles do we have published in each year?¶

Number of non-matches in the 6,069-article dataset¶

Nonmatches¶

Quotation Length Statistics¶

Average numbers of quoted words per item¶

Average number of words per match, per item¶

Synchronic Analysis¶

Quotations and words quoted per book in Middlemarch¶

Number of words quoted per book in Middlemarch¶

Number of words quoted per book in Middlemarch, bar chart¶

Number of quotations per book in Middlemarch¶

Number of quotations per book in Middlemarch, bar chart¶

Quotations and words quoted by chapter in Middlemarch¶

Number of words quoted, by chapter in Middlemarch¶

Number of words quoted, by chapter in Middlemarch, bar chart¶

Number of quotations, by chapter in Middlemarch¶

Number of quotations, by chapter in Middlemarch, bar chart¶

Number of quotations, by chapter in Middlemarch, bar chart (ranked by frequency)¶

Number of quotations, by paragraph in Middlemarch, bar chart (sorted by frequency)¶

Normalized number of words quoted per chapter¶

Diachronic Analysis¶

Middlemarch diachronic analysis: quotations per book, by decade¶

Number of quotations per book, per decade (not normalized or weighted)¶

Middlemarch quotations per book, per decade (not normalized or weighted), heat map¶

Number of quotations per book, per decade (normalized by decade)¶

Number of quotations per book, per decade (normalized by decade and weighted by word count)¶

Middlemarch quotations per book, per decade (normalized and weighted), heat map¶

Middlemarch diachronic analysis: quotations per chapter, by decade¶

Number of quotations per chapter, per decade (not normalized or weighted)¶

Middlemarch quotations per chapter, per decade (not normalized or weighted), heat map¶

Number of quotations per chapter, per decade (normalized by decade and weighted by word count)¶

Middlemarch quotations per chapter, per decade (normalized and weighted), heat map¶

Middlemarch top 5 most frequently quoted chapters, line chart¶

Middlemarch top 5 most frequently quoted chapters, line chart (color)¶

Middlemarch top 5 most frequently quoted chapters (normalized and weighted), line chart¶

Middlemarch top 5 most frequently quoted chapters (normalized and weighted), line chart (color)¶

Middlemarch chapter-specific analysis¶

Chapter 15¶

Paragraph-level analysis of Chapter 15¶

Chapter 20¶

Paragraph-level analysis of Chapter 20¶

Which paragraphs in Chapter 20 are quoted most often?¶

Middlemarch quotations, by journal¶

Descriptive statistics on journals in JSTOR dataset¶

Middlemarch quotations per chapter, by journal, stacked bar chart¶

Statistics on Victorianist journals in the dataset¶

Proportion of matches from Victorianist journals¶

George Eliot - George Henry Lewes Studies (GE-GHLS)¶

Articles where journal title is George Eliot - George Henry Lewes Studies¶

GE-GHLS: Middlemarch quotations per chapter¶

Diachronic Analysis of GE-GHLS Quotations¶

GE-GHLS: Middlemarch quotations per book, per decade (normalized and weighted)¶

GE-GHLS: Middlemarch quotations per chapter, per decade (not normalized or weighted)¶

GE-GHLS: Middlemarch quotations per chapter, per decade (not normalized or weighted), heat map¶

GE-GHLS: Middlemarch quotations per chapter, per decade (normalized by decade and weighted by word count)¶

GE-GHLS: Middlemarch quotations per chapter, per decade (normalized and weighted), heat map¶

Compare the specialist journal, George Eliot - George Henry Lewes Studies, with all other journals¶

Victorian Studies¶

Articles where journal title is Victorian Studies¶

Victorian Studies: Middlemarch quotations per chapter¶

Read in our `text-matcher` JSTOR data¶

Data Dictionary for `text matcher` dataset¶

	Number of Quotations	Chapter
0	159	0
1	197	1
2	89	2
3	114	3
4	40	4
...	...	...
83	40	83
84	10	84
85	4	85
86	29	86
87	187	87