made hedonodon server ready

2023-01-27 21:08:25 +01:00 · 2023-01-27 21:08:25 +01:00 · 2b98565444
commit 2b98565444
parent 52223192b4
10 changed files with 342 additions and 336 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,11 +1,12 @@
-database.db
-plots
-instance
-__pycache__
-hedonodon_clientcred.secret
-hedonodon_usercred.secret
-.fleet
-test.py
-.idea
-cardiffnlp
-venv
+database.db
+plots
+instance
+__pycache__
+hedonodon_clientcred.secret
+hedonodon_usercred.secret
+.fleet
+test.py
+.idea
+cardiffnlp
+venv
+logs.txt
--- a/CRUDManager.py
+++ b/CRUDManager.py
@ -1,48 +1,48 @@
-from DbSetup import engine, session, databaseUrl
-import pandas as pd
-from sqlalchemy import desc, select
-from Tables import Toots
-
-
-def calculateSentimentCount():
-    query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
-                FROM Toots
-                GROUP BY DATE(datetime),
-                sentiment
-                HAVING datetime >= DATE("now","-1 day")
-                AND datetime < DATE("now")'''
-    return pd.read_sql(
-        query,
-        databaseUrl,
-        parse_dates=["datetime"]
-    )
-
-def calculateSentimentMean(dataframe):
-    negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
-    positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
-    sentimentSum = dataframe['sentimentCount'].sum()
-    sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
-    sentimentDate = dataframe.loc[0]['date']
-    return pd.DataFrame.from_records(
-        [
-            {
-                'date': sentimentDate,
-                'sentimentsMean': sentimentMean
-            }
-        ]
-    )
-
-class CRUDManager():
-
-    def saveToDatabase(self, dataframe, table:str, useIndex=False):
-        try:
-            dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
-        except:
-            print(f'Could not save data to {table}!')
-
-    def loadFromDatabase(self, table:str, indexColumn=None):
-        return pd.read_sql_table(table, databaseUrl, index_col=indexColumn)
-
-    def getLastToot(self):
-        stmt = select(Toots.tootId).order_by(desc('datetime'))
-        return session.scalars(stmt).first()
+from DbSetup import connection, engine, session, databaseUrl
+import pandas as pd
+from sqlalchemy import desc, select, sql
+from Tables import Toots
+
+
+def calculateSentimentCount():
+    query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
+                FROM Toots
+                GROUP BY DATE(datetime),
+                sentiment
+                HAVING datetime >= DATE("now","-1 day")
+                AND datetime < DATE("now")'''
+    return pd.read_sql(
+        sql.text(query),
+        connection,
+        parse_dates=["datetime"]
+    )
+
+def calculateSentimentMean(dataframe):
+    negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
+    positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
+    sentimentSum = dataframe['sentimentCount'].sum()
+    sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
+    sentimentDate = dataframe.loc[0]['date']
+    return pd.DataFrame.from_records(
+        [
+            {
+                'date': sentimentDate,
+                'sentimentsMean': sentimentMean
+            }
+        ]
+    )
+
+class CRUDManager():
+
+    def saveToDatabase(self, dataframe, table:str, useIndex=False):
+        try:
+            dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
+        except:
+            print(f'Could not save data to {table}!')
+
+    def loadFromDatabase(self, table:str, indexColumn=None):
+        return pd.read_sql_table(table, connection, index_col=indexColumn)
+
+    def getLastToot(self):
+        stmt = select(Toots.tootId).order_by(desc('datetime'))
+        return session.scalars(stmt).first()
--- a/DbSetup.py
+++ b/DbSetup.py
@ -1,11 +1,12 @@
-from sqlalchemy import create_engine
-from sqlalchemy.orm import Session
-from sqlalchemy.ext.declarative import declarative_base
-
-databaseUrl = 'sqlite:///database.db'
-engine = create_engine(databaseUrl, future=True)
-session = Session(engine)
-Base = declarative_base()
-
-def init_db():
-     Base.metadata.create_all(bind=engine)
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+from sqlalchemy.ext.declarative import declarative_base
+
+databaseUrl = 'sqlite:///database.db'
+engine = create_engine(databaseUrl, future=True)
+connection = engine.connect()
+session = Session(engine)
+Base = declarative_base()
+
+def init_db():
+     Base.metadata.create_all(bind=engine)
--- a/Main.py
+++ b/Main.py
@ -1,98 +1,98 @@
-from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
-from datetime import datetime, date
-from DbSetup import init_db
-import locale
-from MastodonAccountManager import MastodonAccountManager
-import matplotlib.pyplot as plt
-import matplotlib.dates as mdates
-from TootCrawler import TootCrawler
-
-locale.setlocale(locale.LC_TIME, "en_EN.UTF-8")
-init_db()
-
-mastodonAccountManager = MastodonAccountManager()
-mastodonInstance = mastodonAccountManager.instance
-"""
-mastodonInstance.log_in(
-    'USER-EMAIL',
-    'PW',
-    to_file = 'hedonodon_usercred.secret'
-)
-"""
-
-tootCrawler = TootCrawler(mastodonInstance)
-crudManager = CRUDManager()
-
-lastTootId = crudManager.getLastToot()
-tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
-
-if not tootsDataframe.empty:
-    crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
-else:
-    print('Nothing changed since last database insert!')
-
-sentimentsYesterday = calculateSentimentCount()
-sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
-
-if not tootsDataframe.empty:
-    crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
-    crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
-else:
-    print('Nothing changed since last database insert!')
-
-colormap = {
-    'negative': '#ff9999',
-    'neutral': '#ffcc99',
-    "positive": '#99ff99'
-}
-
-todaysColors = []
-for sentiment in sentimentsYesterday['sentiment'].to_numpy():
-    todaysColors.append(colormap[sentiment])
-
-
-
-TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
-dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
-dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
-
-fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
-
-# Pie chart.
-pieChartlabels = dataframe4PieChart.index.to_numpy()
-pieChart = dataframe4PieChart.plot.pie(
-    ax=axes[0],
-    y='sentimentCount',
-    ylabel="",
-    labels=dataframe4PieChart['sentimentCount'],
-    title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
-    colors=todaysColors,
-    wedgeprops=dict(linewidth=3, edgecolor='w'),
-    startangle=90
-)
-
-axes[0].axis('equal')
-centre_circle = plt.Circle((0, 0), 0.6, fc='white')
-axes[0].add_patch(centre_circle)
-chartBox = axes[0].get_position()
-axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
-
-# Line chart.
-lineChart = dataframe4LineChart.plot.line(
-    ax=axes[1],
-    title='Mean of all sentiments from max positive (1) to min negative (-1)'
-)
-axes[1].grid(True)
-axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
-axes[1].set_ylim([-1, 1])
-axes[1].xaxis.set_major_locator(mdates.MonthLocator())
-axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
-axes[1].xaxis.set_major_formatter(plt.NullFormatter())
-axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
-axes[1].tick_params(which='minor', length=0)
-plotFileUrl = f'./plots/{TodayDate}.png'
-plt.savefig(plotFileUrl)
-
-media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
-mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
-
+from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
+from datetime import datetime, date
+from DbSetup import init_db
+import locale
+from MastodonAccountManager import MastodonAccountManager
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+from TootCrawler import TootCrawler
+
+locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
+init_db()
+
+mastodonAccountManager = MastodonAccountManager()
+mastodonInstance = mastodonAccountManager.instance
+"""
+mastodonInstance.log_in(
+    'USER-EMAIL',
+    'PW',
+    to_file = 'hedonodon_usercred.secret'
+)
+"""
+
+tootCrawler = TootCrawler(mastodonInstance)
+crudManager = CRUDManager()
+
+lastTootId = crudManager.getLastToot()
+tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
+
+if not tootsDataframe.empty:
+    crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
+else:
+    print('Nothing changed since last database insert!')
+
+sentimentsYesterday = calculateSentimentCount()
+sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
+
+if not tootsDataframe.empty:
+    crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
+    crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
+else:
+    print('Nothing changed since last database insert!')
+
+colormap = {
+    'negative': '#ff9999',
+    'neutral': '#ffcc99',
+    "positive": '#99ff99'
+}
+
+todaysColors = []
+for sentiment in sentimentsYesterday['sentiment'].to_numpy():
+    todaysColors.append(colormap[sentiment])
+
+
+
+TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
+dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
+dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
+
+fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
+
+# Pie chart.
+pieChartlabels = dataframe4PieChart.index.to_numpy()
+pieChart = dataframe4PieChart.plot.pie(
+    ax=axes[0],
+    y='sentimentCount',
+    ylabel="",
+    labels=dataframe4PieChart['sentimentCount'],
+    title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
+    colors=todaysColors,
+    wedgeprops=dict(linewidth=3, edgecolor='w'),
+    startangle=90
+)
+
+axes[0].axis('equal')
+centre_circle = plt.Circle((0, 0), 0.6, fc='white')
+axes[0].add_patch(centre_circle)
+chartBox = axes[0].get_position()
+axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
+
+# Line chart.
+lineChart = dataframe4LineChart.plot.line(
+    ax=axes[1],
+    title='Mean of all sentiments from max positive (1) to min negative (-1)'
+)
+axes[1].grid(True)
+axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
+axes[1].set_ylim([-1, 1])
+axes[1].xaxis.set_major_locator(mdates.MonthLocator())
+axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
+axes[1].xaxis.set_major_formatter(plt.NullFormatter())
+axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
+axes[1].tick_params(which='minor', length=0)
+plotFileUrl = f'./plots/{TodayDate}.png'
+plt.savefig(plotFileUrl)
+
+media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
+mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
+
--- a/MastodonAccountManager.py
+++ b/MastodonAccountManager.py
@ -1,5 +1,5 @@
-from mastodon import Mastodon
-
-class MastodonAccountManager():
-    def __init__(self):
-        self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
+from mastodon import Mastodon
+
+class MastodonAccountManager():
+    def __init__(self):
+        self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# Hedonodon
-I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
-
+# Hedonodon
+I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
+
 More Documentation coming soon!
--- a/SentiTooter.py
+++ b/SentiTooter.py
@ -1,74 +1,74 @@
-from germansentiment import SentimentModel
-import numpy as np
-from scipy.special import softmax
-from transformers import AutoModelForSequenceClassification
-from transformers import AutoTokenizer
-from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-
-
-# Preprocess text (username and link placeholders)
-def preprocess(text):
-    new_text = []
-
-    for t in text.split(" "):
-        t = '@user' if t.startswith('@') and len(t) > 1 else t
-        t = 'http' if t.startswith('http') else t
-        new_text.append(t)
-    return " ".join(new_text)
-
-
-class SentiTooter:
-    """"""
-
-    def __init__(self):
-        self.deModel = SentimentModel()
-        self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
-        self.enModel, self.enTokenizer = self.initModel()
-        # https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
-        self.labels = ['negative', 'neutral', 'positive']
-        self.sia = SentimentIntensityAnalyzer()
-
-    def analyze(self, language, content):
-        match language:
-            case 'de':
-                sentiment = self.deModel.predict_sentiment([content])
-                sentiment.append('germanSentiment')
-                return sentiment
-            case 'en':
-                text = preprocess(content)
-                encoded_input = self.enTokenizer(text, return_tensors='pt')
-                output = self.enModel(**encoded_input)
-                scores = output[0][0].detach().numpy()
-                scores = softmax(scores)
-                sentimentIndexWithMaxScore = np.argmax(scores)
-                sentimentLabel = self.labels[sentimentIndexWithMaxScore]
-                sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
-                return sentiment
-            case _:
-                compound = self.sia.polarity_scores(content)['compound']
-                if compound > (1 / 3):
-                    return ['positive', 'vaderSentiment']
-                elif compound < (-1 / 3):
-                    return ['negative', 'vaderSentiment']
-                else:
-                    return ['neutral', 'vaderSentiment']
-
-
-
-    def initModel(self):
-        # PT
-        tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
-        tokenizer.save_pretrained(self.enModelType)
-        model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
-        model.save_pretrained(self.enModelType)
-        return model, tokenizer
-
-    # # TF
-    # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
-    # model.save_pretrained(MODEL)
-
-    # text = "Good night 😊"
-    # encoded_input = tokenizer(text, return_tensors='tf')
-    # output = model(encoded_input)
-    # scores = output[0][0].numpy()
-    # scores = softmax(scores)
+from germansentiment import SentimentModel
+import numpy as np
+from scipy.special import softmax
+from transformers import AutoModelForSequenceClassification
+from transformers import AutoTokenizer
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+
+# Preprocess text (username and link placeholders)
+def preprocess(text):
+    new_text = []
+
+    for t in text.split(" "):
+        t = '@user' if t.startswith('@') and len(t) > 1 else t
+        t = 'http' if t.startswith('http') else t
+        new_text.append(t)
+    return " ".join(new_text)
+
+
+class SentiTooter:
+    """"""
+
+    def __init__(self):
+        self.deModel = SentimentModel()
+        self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
+        self.enModel, self.enTokenizer = self.initModel()
+        # https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
+        self.labels = ['negative', 'neutral', 'positive']
+        self.sia = SentimentIntensityAnalyzer()
+
+    def analyze(self, language, content):
+        match language:
+            case 'de':
+                sentiment = self.deModel.predict_sentiment([content])
+                sentiment.append('germanSentiment')
+                return sentiment
+            case 'en':
+                text = preprocess(content)
+                encoded_input = self.enTokenizer(text, return_tensors='pt')
+                output = self.enModel(**encoded_input)
+                scores = output[0][0].detach().numpy()
+                scores = softmax(scores)
+                sentimentIndexWithMaxScore = np.argmax(scores)
+                sentimentLabel = self.labels[sentimentIndexWithMaxScore]
+                sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
+                return sentiment
+            case _:
+                compound = self.sia.polarity_scores(content)['compound']
+                if compound > (1 / 3):
+                    return ['positive', 'vaderSentiment']
+                elif compound < (-1 / 3):
+                    return ['negative', 'vaderSentiment']
+                else:
+                    return ['neutral', 'vaderSentiment']
+
+
+
+    def initModel(self):
+        # PT
+        tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
+        tokenizer.save_pretrained(self.enModelType)
+        model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
+        model.save_pretrained(self.enModelType)
+        return model, tokenizer
+
+    # # TF
+    # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
+    # model.save_pretrained(MODEL)
+
+    # text = "Good night 😊"
+    # encoded_input = tokenizer(text, return_tensors='tf')
+    # output = model(encoded_input)
+    # scores = output[0][0].numpy()
+    # scores = softmax(scores)
--- a/Tables.py
+++ b/Tables.py
@ -1,32 +1,32 @@
-from DbSetup import Base
-from sqlalchemy import Column, Date, Integer, Float, String
-
-class Toots(Base):
-    __tablename__ = 'Toots'
-    __table_args__ = {'extend_existing': True}
-    index = Column(Integer, primary_key=True)
-    model = Column(String(30))
-    datetime = Column(Date)
-    language = Column(String(3))
-    sentiment = Column(String(8))
-    tootId = Column(String(255))
-    toot = Column(String(600))
-    userName = Column(String(255))
-    userId = Column(String(255))
-
-
-
-class SentimentCounts(Base):
-    __tablename__ = 'SentimentCounts'
-    __table_args__ = {'extend_existing': True}
-    index = Column(Integer, primary_key=True)
-    sentimentCount = Column(Integer)
-    date = Column(Date, primary_key=True)
-    sentiment = Column(String(8))
-
-class SentimentMeans(Base):
-    __tablename__ = 'SentimentMeans'
-    __table_args__ = {'extend_existing': True}
-    index = Column(Integer, primary_key=True)
-    date = Column(Date, primary_key=True)
+from DbSetup import Base
+from sqlalchemy import Column, Date, Integer, Float, String
+
+class Toots(Base):
+    __tablename__ = 'Toots'
+    __table_args__ = {'extend_existing': True}
+    index = Column(Integer, primary_key=True)
+    model = Column(String(30))
+    datetime = Column(Date)
+    language = Column(String(3))
+    sentiment = Column(String(8))
+    tootId = Column(String(255))
+    toot = Column(String(600))
+    userName = Column(String(255))
+    userId = Column(String(255))
+
+
+
+class SentimentCounts(Base):
+    __tablename__ = 'SentimentCounts'
+    __table_args__ = {'extend_existing': True}
+    index = Column(Integer, primary_key=True)
+    sentimentCount = Column(Integer)
+    date = Column(Date, primary_key=True)
+    sentiment = Column(String(8))
+
+class SentimentMeans(Base):
+    __tablename__ = 'SentimentMeans'
+    __table_args__ = {'extend_existing': True}
+    index = Column(Integer, primary_key=True)
+    date = Column(Date, primary_key=True)
    SentimentsMean = Column(Float)
--- a/TootCrawler.py
+++ b/TootCrawler.py
@ -1,48 +1,48 @@
-from langdetect import detect
-import pytz
-import pandas as pd
-import re
-from SentiTooter import SentiTooter
-from pprint import pprint
-
-class TootCrawler():
-
-    def __init__(self, mastodonInstance) -> None:
-        self.mastodonInstance = mastodonInstance
-        self.compilePattern = re.compile('<.*?>')
-        self.sentiTooter = SentiTooter()
-        self.localTimezone = pytz.timezone('Europe/Berlin')
-
-    def getLocalTimeline(self, minId=None):
-        return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
-
-    def cleanhtml(self, raw_html):
-        cleantext = re.sub(self.compilePattern, '', raw_html)
-        cleantext = re.sub(r'http\S+', '', cleantext)
-        return cleantext
-
-    def buildTootsDataframe(self, minId=None):
-        toots = []
-        allTimelineResults = []
-        timelinePagination = self.getLocalTimeline(minId)
-
-        while timelinePagination:
-            allTimelineResults = allTimelineResults + timelinePagination
-            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
-        for i in allTimelineResults:
-            content = self.cleanhtml(i.content)
-            language = detect(content)
-            sentiment = self.sentiTooter.analyze(language, content)
-            toot = {
-                "sentiment": sentiment[0],
-                "model": sentiment[1],
-                "toot": content,
-                "datetime": i.created_at.astimezone(self.localTimezone),
-                "language": language,
-                "userName": i.account.display_name,
-                "userId": i.account.id,
-                "tootId": i.id
-            }
-            toots.append(toot)
-        toots.sort(key=lambda item:item.get('datetime'))
+from langdetect import detect
+import pytz
+import pandas as pd
+import re
+from SentiTooter import SentiTooter
+from pprint import pprint
+
+class TootCrawler():
+
+    def __init__(self, mastodonInstance) -> None:
+        self.mastodonInstance = mastodonInstance
+        self.compilePattern = re.compile('<.*?>')
+        self.sentiTooter = SentiTooter()
+        self.localTimezone = pytz.timezone('Europe/Berlin')
+
+    def getLocalTimeline(self, minId=None):
+        return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
+
+    def cleanhtml(self, raw_html):
+        cleantext = re.sub(self.compilePattern, '', raw_html)
+        cleantext = re.sub(r'http\S+', '', cleantext)
+        return cleantext
+
+    def buildTootsDataframe(self, minId=None):
+        toots = []
+        allTimelineResults = []
+        timelinePagination = self.getLocalTimeline(minId)
+
+        while timelinePagination:
+            allTimelineResults = allTimelineResults + timelinePagination
+            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
+        for i in allTimelineResults:
+            content = self.cleanhtml(i.content)
+            language = detect(content)
+            sentiment = self.sentiTooter.analyze(language, content)
+            toot = {
+                "sentiment": sentiment[0],
+                "model": sentiment[1],
+                "toot": content,
+                "datetime": i.created_at.astimezone(self.localTimezone),
+                "language": language,
+                "userName": i.account.display_name,
+                "userId": i.account.id,
+                "tootId": i.id
+            }
+            toots.append(toot)
+        toots.sort(key=lambda item:item.get('datetime'))
        return pd.DataFrame.from_records(toots)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,8 +1,12 @@
-mastodon.py
-matplotlib
-pandas
-sqlalchemy
-vader-multi
-numpy
-pytz
-transformers
+mastodon.py
+matplotlib
+pandas
+sqlalchemy
+vader-multi
+langdetect
+numpy
+pytz
+transformers
+wheel
+germansentiment
+scipy