Fixed some typos

Updated the README
take the large spacy model
2023-03-17 21:29:16 +01:00 · 2023-03-17 21:26:14 +01:00 · 2023-03-17 21:25:44 +01:00 · 2023-03-17 20:06:01 +01:00 · 2023-03-15 16:02:47 +01:00 · 2023-03-15 14:27:07 +01:00
10 changed files with 473 additions and 149 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,11 +1,12 @@
-database.db
+database.db
-plots
+plots
-instance
+instance
-__pycache__
+__pycache__
-hedonodon_clientcred.secret
+hedonodon_clientcred.secret
-hedonodon_usercred.secret
+hedonodon_usercred.secret
-.fleet
+.fleet
-test.py
+test.py
-.idea
+.idea
-cardiffnlp
+cardiffnlp
-venv
+venv
 logs.txt
--- a/CRUDManager.py
+++ b/CRUDManager.py
@ -1,10 +1,21 @@
-from DbSetup import engine, session, databaseUrl
+from DbSetup import connection, engine, session, databaseUrl
 import pandas as pd
-from sqlalchemy import desc, select
+from sqlalchemy import desc, select, sql
 from Tables import Toots
 from pandas.core.api import (
    DataFrame)
 def calculateSentimentCount():
    """Calculates the frequencies of the sentiments.
    Returns
    -------
    DataFrame
        Containing date (YY-MM-DD), sentiment (positive, neutral, negative),
        and sentimentCount.
    """
    query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
                FROM Toots
                GROUP BY DATE(datetime),
@ -12,12 +23,23 @@ def calculateSentimentCount():
                HAVING datetime >= DATE("now","-1 day")
                AND datetime < DATE("now")'''
    return pd.read_sql(
-        query,
+        sql.text(query),
-        databaseUrl,
+        connection,
        parse_dates=["datetime"]
    )
-def calculateSentimentMean(dataframe):
+def calculateSentimentMean(dataframe:DataFrame) -> DataFrame:
    """Calculates the mean of the sentiments.
    Parameters
    -------
        dataframe: DataFrame
    Returns
    -------
        Dataframe
        Containing date (YY-MM-DD), sentimentsMean.
    """
    negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
    positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
    sentimentSum = dataframe['sentimentCount'].sum()
@ -32,17 +54,68 @@ def calculateSentimentMean(dataframe):
        ]
    )
-class CRUDManager():
+def getYesterdaysToots() -> DataFrame:
    """Fetches yesterdays toots from database.
-    def saveToDatabase(self, dataframe, table:str, useIndex=False):
+    Returns
    -------
        pd.Dataframe
        Containing date (YY-MM-DD), language, sentiment, toot.
    """
    query = f'''SELECT datetime as date, language, sentiment, toot
                FROM Toots
                WHERE datetime >= DATE("now","-1 day")
                AND datetime < DATE("now")'''
    return pd.read_sql(
        sql.text(query),
        connection,
        parse_dates=["datetime"]
    )
 class CRUDManager():
    """Class for database operations"""
    def saveToDatabase(self, dataframe:DataFrame, table:str, useIndex=False):
        """Saves dataframe to database.
        Parameters
        -------
            dataframe: DataFrame
                Input dataframe.
            table: str
                Table, where to save the data.
            useIndex: boolean
                Should the index of the dataframe be used as index for
                the database table?
        """
        try:
            dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
        except:
            print(f'Could not save data to {table}!')
-    def loadFromDatabase(self, table:str, indexColumn=None):
+    def loadFromDatabase(self, table:str, indexColumn=None) -> DataFrame:
-        return pd.read_sql_table(table, databaseUrl, index_col=indexColumn)
+        """Load a table into a dataframe.
-    def getLastToot(self):
+        Parameters
        -------
            table: str
                Table, where to save the data.
            indexColumn: str | None
                Should the index of the table be used as index for
                the dataframe?
        Returns
        -------
            DataFrame
        """
        return pd.read_sql_table(table, connection, index_col=indexColumn)
    def getLastToot(self) -> str:
        """Query the last toot id from database.
        Results
        -------
            str
            A toot id.
        """
        stmt = select(Toots.tootId).order_by(desc('datetime'))
-        return session.scalars(stmt).first()
+        return session.scalars(stmt).first()
--- a/DbSetup.py
+++ b/DbSetup.py
@ -1,11 +1,18 @@
-from sqlalchemy import create_engine
+"""Script to initialize the database.
-from sqlalchemy.orm import Session
+     Serves database url, engine, connection and session.
-from sqlalchemy.ext.declarative import declarative_base
+"""
-
+
-databaseUrl = 'sqlite:///database.db'
+from sqlalchemy import create_engine
-engine = create_engine(databaseUrl, future=True)
+from sqlalchemy.orm import Session
-session = Session(engine)
+from sqlalchemy.ext.declarative import declarative_base
-Base = declarative_base()
+
-
+databaseUrl = 'sqlite:///database.db'
-def init_db():
+engine = create_engine(databaseUrl, future=True)
-     Base.metadata.create_all(bind=engine)
+connection = engine.connect()
 session = Session(engine)
 Base = declarative_base()
 def init_db():
     """Initialize the database.
     """
     Base.metadata.create_all(bind=engine)
--- a/Main.py
+++ b/Main.py
@ -1,4 +1,20 @@
-from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
+"""
 Hedonodon toot sentiment analyzer.
 This programm fetches toots from the fedihum.org Mastodon instance, calculates
 the frequencies of the sentiments (positive, neutral, negative) and the mean
 from these nominal values (even this is not statistical correct (;-_-)!, but
 not all analyzer return compounds).
 It also calculates the word count of the nouns per sentiment.
 It uses germansentiment for german toots, twitter-roberta-base-sentiment for
 english toots, and vaderSentiment for other languages.
 For the word counts I translate the toots to english with the GoogleTranslator
 first.
 """
 from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean, getYesterdaysToots
 from datetime import datetime, date
 from DbSetup import init_db
 import locale
@ -6,10 +22,12 @@ from MastodonAccountManager import MastodonAccountManager
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 from TootCrawler import TootCrawler
 from SentiTooter import translateToots, createWordFrequenciesPerSentiment
-locale.setlocale(locale.LC_TIME, "en_EN.UTF-8")
+locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
 init_db()
 print('Initialize Mastodon...')
 mastodonAccountManager = MastodonAccountManager()
 mastodonInstance = mastodonAccountManager.instance
 """
@ -19,27 +37,47 @@ mastodonInstance.log_in(
    to_file = 'hedonodon_usercred.secret'
 )
 """
 print('done!')
 print('Fetching recent toots...')
 tootCrawler = TootCrawler(mastodonInstance)
 crudManager = CRUDManager()
 lastTootId = crudManager.getLastToot()
 tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
-exit()
+print('done!')
 print('Save toots to database...')
 if not tootsDataframe.empty:
    crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
    print('done!')
 else:
-    print('Nothing changed since last database insert!')
+    print('nothing changed since last database insert!')
 print('Calculate word counts...')
 yesterdaysToots = getYesterdaysToots()
 translatedToots = translateToots(yesterdaysToots)
 wordCountsPerSentiment = createWordFrequenciesPerSentiment(translatedToots)
 print('done!')
 print(wordCountsPerSentiment);
 print('Calculate sentiment counts...')
 sentimentsYesterday = calculateSentimentCount()
-sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
+print('done!')
 print('Calculate sentiment mean...')
 sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
 print('done!')
 print('Save calculations to database...')
 if not tootsDataframe.empty:
    crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
    crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
    print('done!')
 else:
-    print('Nothing changed since last database insert!')
+    print('nothing changed since last database insert!')
 print('Create figure...')
 colormap = {
    'negative': '#ff9999',
    'neutral': '#ffcc99',
@ -80,7 +118,7 @@ axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
 # Line chart.
 lineChart = dataframe4LineChart.plot.line(
    ax=axes[1],
-    title='Mean of all sentiments from max positive (1) to min negative (-1)'
+    title='"Mean" of sentiments, calculated from nominal values, pos(1), neu (0), neg (-1)!'
 )
 axes[1].grid(True)
 axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
@ -92,7 +130,9 @@ axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
 axes[1].tick_params(which='minor', length=0)
 plotFileUrl = f'./plots/{TodayDate}.png'
 plt.savefig(plotFileUrl)
 print('done!')
 print('Send toot...')
 #media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
-#mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
+#mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.\nWord counts per sentiment:\n{wordCountsPerSentiment}', media_ids=media, language='en')
-
+print('done!')
--- a/MastodonAccountManager.py
+++ b/MastodonAccountManager.py
@ -1,5 +1,7 @@
-from mastodon import Mastodon
+from mastodon import Mastodon
-
+
-class MastodonAccountManager():
+class MastodonAccountManager():
-    def __init__(self):
+    """Initialize the Mastodon account.
-        self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
+    """
    def __init__(self):
        self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
--- a/README.md
+++ b/README.md
@ -1,4 +1,19 @@
-# Hedonodon
+# Hedonodon
-I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
+## Prerequisites
-
+Install the dependencies with `python -m pip install -r requirements.txt`.
-More Documentation coming soon!
+Install SpaCys nlp model with `python -m spacy download en_core_web_lg`.
 If the automatic download of the twitter-roberta-base-sentiment model and tokenizer fail, go to the model pages on hugging face (see models section) and download the to the respective folder (cardiffnlp/twitter-roberta-base-sentiment).
 ## Purpose
 Hedonodon fetch toots from fedihum.org and calculates the sentiments, sentiment mean and word frequencies of each day, and creates fancy diagrams from the data.
 ## Motivation
 This tool was created to understand how sentiment analyses and nlp methods works, so it may lacks of proper use of models etc...
 ## Models
 It uses "germansentiment"](https://huggingface.co/oliverguhr/german-sentiment-bert) for german toots, []"twitter-roberta-base-sentiment"](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment) for
 english toots, and ["vaderSentiment"](https://pypi.org/project/vaderSentiment/)  for other languages.
 For the word counts I translate the toots to english with the GoogleTranslator from [deep_translater](https://pypi.org/project/deep-translator/) first and then use SpaCys nlp model ["en_core_web_lg"](https://spacy.io/models/en/) to calculate the word frequencies.
 ## Weaknesses
 Since some moduls do not return sentiment compounds I have to use the nominal sentiment values (positive, neutral, negative) to calculate the mean of the day, which is statisticaly not okay (;-_-).
--- a/SentiTooter.py
+++ b/SentiTooter.py
@ -1,26 +1,43 @@
 from germansentiment import SentimentModel
 from pandas import DataFrame
 import numpy as np
 from scipy.special import softmax
 from transformers import AutoModelForSequenceClassification
 from transformers import AutoTokenizer
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-
+from deep_translator import GoogleTranslator
 import spacy
 from collections import Counter
 # Preprocess text (username and link placeholders)
-def preprocess(text):
+def preprocess(text:str) -> str:
    """Removes tags and urls from text.
    Parameters
    ------
        text: str
        The raw toot from Mastodon.
    Returns
    ------
        str
        The cleaned text.
    """
    new_text = []
    for t in text.split(" "):
-        t = '@user' if t.startswith('@') and len(t) > 1 else t
+        t = '' if t.startswith('@') and len(t) > 1 else t
-        t = 'http' if t.startswith('http') else t
+        t = '' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)
 class SentiTooter:
-    """"""
+    """Class to analyze the toots.
    """
    def __init__(self):
        """Initilize the sentiment models and labels.
        """
        self.deModel = SentimentModel()
        self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
        self.enModel, self.enTokenizer = self.initModel()
@ -28,7 +45,20 @@ class SentiTooter:
        self.labels = ['negative', 'neutral', 'positive']
        self.sia = SentimentIntensityAnalyzer()
-    def analyze(self, language, content):
+    def analyze(self, language:str, content:str) -> list[str, str, float]:
        """Analyzes the sentiments of the toots.
        Parameters
        ------
            language: str
            The language tag of the toot.
            content: str
            The toot content.
        Returns
        ------
            list[str, str, float]
            A list with the sentiment, analyzer type, and sentiment score.
        """
        match language:
            case 'de':
                sentimentList, probabilitiesList = self.deModel.predict_sentiment([content], output_probabilities=True)
@ -41,15 +71,15 @@ class SentiTooter:
                output = self.enModel(**encoded_input)
                scores = output[0][0].detach().numpy()
                scores = softmax(scores)
-                print(scores)
+                #print(scores)
                sentimentIndexWithMaxScore = np.argmax(scores)
                sentimentLabel = self.labels[sentimentIndexWithMaxScore]
                sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment', max(scores)]
-                print(sentiment)
+                #print(sentiment)
                return sentiment
            case _:
                compound = self.sia.polarity_scores(content)['compound']
-                print(self.sia.polarity_scores(content), 'vaderSentiment')
+                #print(self.sia.polarity_scores(content), 'vaderSentiment')
                if compound > (1 / 3):
                    return ['positive', 'vaderSentiment']
                elif compound < (-1 / 3):
@ -58,8 +88,14 @@ class SentiTooter:
                    return ['neutral', 'vaderSentiment']
    def initModel(self):
        """Initialize the english models.
        Returns
        ------
            tupel
                The pretrained model and tokenizer.
        """
        # PT
        tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
        tokenizer.save_pretrained(self.enModelType)
@ -67,12 +103,93 @@ class SentiTooter:
        model.save_pretrained(self.enModelType)
        return model, tokenizer
-    # # TF
+def translateToots(yesterdaysToots:DataFrame) -> DataFrame:
-    # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
+    """Translates all toots to english.
    # model.save_pretrained(MODEL)
-    # text = "Good night 😊"
+    Returns
-    # encoded_input = tokenizer(text, return_tensors='tf')
+    ------
-    # output = model(encoded_input)
+        Dataframe
-    # scores = output[0][0].numpy()
+        Containing the english translated toots.
-    # scores = softmax(scores)
+    """
    yesterdaysTootsTranslated = yesterdaysToots
    for index, row in yesterdaysTootsTranslated.iterrows():
        if (row['language'] != 'en'):
            try:
                yesterdaysTootsTranslated.at[index,'toot'] = translateToot(row['language'], row['toot'])
                yesterdaysTootsTranslated.at[index,'language'] = 'en'
            except:
                yesterdaysTootsTranslated.drop(index)
    return yesterdaysTootsTranslated
 def translateToot(language:str, toot:str) -> str:
    """Translate a toot in english.
    Parameters
    ------
        language:str
        The language of the toot.
        toot: str
        The toot content.
    Returns
    ------
        str
        The in english translated toot.
    """
    content = preprocess(toot)
    return GoogleTranslator(source=language, target='en').translate(content)
 def countWords(concatedToots: str, number: int) -> list:
    """Counts the word frequencies in all toots of a given sentiment.
    Parameters
    ------
        concatedToots: str
        All toots from a sentiment.
        number: int
        Number of words to calculate word frequencies.
    Returns
    ------
        list
        List containing tuple of word and word frequency.
    """
    nlp = spacy.load('en_core_web_lg')
    doc = nlp(concatedToots)
    # noun tokens that arent stop words or punctuations
    nouns = [token.text
            for token in doc
            if (not token.is_stop and
                not token.is_punct and
                token.pos_ == "NOUN")]
    # five most common noun tokens
    noun_freq = Counter(nouns)
    return noun_freq.most_common(number)
 def createWordFrequenciesPerSentiment(translatedToots:DataFrame) -> str:
    """Count all word frequencies of all toots per sentiment.
    Paramters
    ------
        translatedToots: DataFrame
        The dataframe with all toots in english.
    Returns
    ------
        str
        Containing words and wourd counts per sentiment.
    """
    sentimentList = []
    for sentiment in ['positive', 'neutral', 'negative']:
        tootsSeries = translatedToots[translatedToots['sentiment'] == sentiment].toot
        wordFrequencies = countWords(tootsSeries.str.cat(sep=' '), 5)
        FrequenciesList = []
        for Frequencies in wordFrequencies:
             FrequenciesList.append(str(Frequencies[0]) + ' (' + str(Frequencies[1]) + ')')
        list2String = ', '.join(FrequenciesList)
        sentimentString = sentiment + ': ' + list2String
        sentimentList.append(sentimentString)
    wordFrequenciessPerSentiments = '\n'.join(sentimentList)
    return wordFrequenciessPerSentiments
--- a/Tables.py
+++ b/Tables.py
@ -1,32 +1,32 @@
-from DbSetup import Base
+"""This script containing the table definitions for the database."""
-from sqlalchemy import Column, Date, Integer, Float, String
+
-
+from DbSetup import Base
-class Toots(Base):
+from sqlalchemy import Column, Date, Integer, Float, String
-    __tablename__ = 'Toots'
+
-    __table_args__ = {'extend_existing': True}
+class Toots(Base):
-    index = Column(Integer, primary_key=True)
+    __tablename__ = 'Toots'
-    model = Column(String(30))
+    __table_args__ = {'extend_existing': True}
-    datetime = Column(Date)
+    index = Column(Integer, primary_key=True)
-    language = Column(String(3))
+    model = Column(String(30))
-    sentiment = Column(String(8))
+    datetime = Column(Date)
-    tootId = Column(String(255))
+    language = Column(String(3))
-    toot = Column(String(600))
+    sentiment = Column(String(8))
-    userName = Column(String(255))
+    tootId = Column(String(255))
-    userId = Column(String(255))
+    toot = Column(String(600))
-
+    userName = Column(String(255))
-
+    userId = Column(String(255))
-
+
-class SentimentCounts(Base):
+class SentimentCounts(Base):
-    __tablename__ = 'SentimentCounts'
+    __tablename__ = 'SentimentCounts'
-    __table_args__ = {'extend_existing': True}
+    __table_args__ = {'extend_existing': True}
-    index = Column(Integer, primary_key=True)
+    index = Column(Integer, primary_key=True)
-    sentimentCount = Column(Integer)
+    sentimentCount = Column(Integer)
-    date = Column(Date, primary_key=True)
+    date = Column(Date, primary_key=True)
-    sentiment = Column(String(8))
+    sentiment = Column(String(8))
-
+
-class SentimentMeans(Base):
+class SentimentMeans(Base):
-    __tablename__ = 'SentimentMeans'
+    __tablename__ = 'SentimentMeans'
-    __table_args__ = {'extend_existing': True}
+    __table_args__ = {'extend_existing': True}
-    index = Column(Integer, primary_key=True)
+    index = Column(Integer, primary_key=True)
-    date = Column(Date, primary_key=True)
+    date = Column(Date, primary_key=True)
    SentimentsMean = Column(Float)
--- a/TootCrawler.py
+++ b/TootCrawler.py
@ -1,48 +1,111 @@
-from langdetect import detect
+from langdetect import detect
-import pytz
+import pytz
-import pandas as pd
+import pandas as pd
-import re
+from pandas import DataFrame
-from SentiTooter import SentiTooter
+import re
-from pprint import pprint
+from SentiTooter import SentiTooter
-
+from pprint import pprint
-class TootCrawler():
+
-
+class TootCrawler():
-    def __init__(self, mastodonInstance) -> None:
+    """Class to fetch the recent toots from fedihum.org."""
-        self.mastodonInstance = mastodonInstance
+
-        self.compilePattern = re.compile('<.*?>')
+    def __init__(self, mastodonInstance: any) -> None:
-        self.sentiTooter = SentiTooter()
+        """Initialize the Mastodon instance and depending classes.
-        self.localTimezone = pytz.timezone('Europe/Berlin')
+
-
+        Parameters
-    def getLocalTimeline(self, minId=None):
+        ------
-        return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
+            mastodonInstance: any
-
+                The initialized Mastodon instance.
-    def cleanhtml(self, raw_html):
+        """
-        cleantext = re.sub(self.compilePattern, '', raw_html)
+        self.mastodonInstance = mastodonInstance
-        cleantext = re.sub(r'http\S+', '', cleantext)
+        self.compilePattern = re.compile('<.*?>')
-        return cleantext
+        self.sentiTooter = SentiTooter()
-
+        self.localTimezone = pytz.timezone('Europe/Berlin')
-    def buildTootsDataframe(self, minId=None):
+
-        toots = []
+    def getLocalTimeline(self, minId=None) -> any:
-        allTimelineResults = []
+        """Receave the local timeline
-        timelinePagination = self.getLocalTimeline(minId)
+
-
+        Parameters
-        while timelinePagination:
+        ------
-            allTimelineResults = allTimelineResults + timelinePagination
+            minId: str | None
-            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
+                The last fetched toot id from the database.
-        for i in allTimelineResults:
+
-            content = self.cleanhtml(i.content)
+        Returns
-            language = detect(content)
+        ------
-            sentiment = self.sentiTooter.analyze(language, content)
+            any
-            toot = {
+                The local Mastodon timeline from fedihum.org.
-                "sentiment": sentiment[0],
+        """
-                "model": sentiment[1],
+        return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
-                "toot": content,
+
-                "datetime": i.created_at.astimezone(self.localTimezone),
+    def cleanhtml(self, raw_html:str) -> str:
-                "language": language,
+        """remove brackets and http string from toots
-                "userName": i.account.display_name,
+
-                "userId": i.account.id,
+        Parameters
-                "tootId": i.id
+        ------
-            }
+            raw_html: str
-            toots.append(toot)
+            The toot content.
-        toots.sort(key=lambda item:item.get('datetime'))
+        Returns
        ------
            str:
            The cleaned toot content.
        """
        cleantext = re.sub(self.compilePattern, '', raw_html)
        cleantext = re.sub(r'http\S+', '', cleantext)
        return cleantext
    def buildTootsDataframe(self, minId=None) -> DataFrame:
        """Parse fetched toots from Mastodon to dataframe.
        Parameters
        ------
            minId: str | None
            The id of the last fetched toot.
        Returns
        ------
            DataFrame
            A Dataframe containing
            sentiment: str
                The sentiment (positive, neutral, negative)
            model: str
                The used sentiment model.
            toot: str
                The content of the toot.
            datetime: datetime
                The datetime of the toot.
            language: str
                The langage flag of the toot.
            userName: str.
                The user name of the toot.
            userId: str
                The user id.
            tootId: str
                The toot id.
        """
        toots = []
        allTimelineResults = []
        timelinePagination = self.getLocalTimeline(minId)
        while timelinePagination:
            allTimelineResults = allTimelineResults + timelinePagination
            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
        for i in allTimelineResults:
            content = self.cleanhtml(i.content)
            try:
                language = detect(content)
            except:
                language = None
            sentiment = self.sentiTooter.analyze(language, content)
            toot = {
                "sentiment": sentiment[0],
                "model": sentiment[1],
                "toot": content,
                "datetime": i.created_at.astimezone(self.localTimezone),
                "language": language,
                "userName": i.account.display_name,
                "userId": i.account.id,
                "tootId": i.id
            }
            toots.append(toot)
        toots.sort(key=lambda item:item.get('datetime'))
        return pd.DataFrame.from_records(toots)
--- a/requirements.txt
+++ b/requirements.txt
@ -3,6 +3,12 @@ matplotlib
 pandas
 sqlalchemy
 vader-multi
 langdetect
 numpy
 pytz
-transformers
+transformers
 wheel
 germansentiment
 scipy
 deep_translator
 spacy
Author	SHA1	Message	Date
rnsrk	03792f2120	Fixed some typos	2023-03-17 21:29:16 +01:00
rnsrk	cafda77e7f	Updated the README	2023-03-17 21:26:14 +01:00
rnsrk	8d9a7fa603	take the large spacy model	2023-03-17 21:25:44 +01:00
rnsrk	bc842244c7	add code documentation	2023-03-17 20:06:01 +01:00
rnsrk	4479bd2429	implement word counts.	2023-03-15 16:02:47 +01:00
rnsrk	6a8caac29e	implement rough wordcount	2023-03-15 14:27:07 +01:00
rnsrk	09fd313a89	Merge branch 'main' into with_cites	2023-03-15 13:25:41 +01:00
rnsrk	3b677e5713	underway to wordcount	2023-03-15 13:21:44 +01:00
Robert Nasarek	8f7c578087	shortend description	2023-03-15 11:16:35 +01:00
Robert Nasarek	79f54079f7	fixed unrecognisable lang bug	2023-01-31 17:51:06 +01:00
Robert Nasarek	2b98565444	made hedonodon server ready	2023-01-27 21:08:25 +01:00