From 2b98565444e6a8e9ead4389941f7ec391566b5e2 Mon Sep 17 00:00:00 2001 From: Robert Nasarek Date: Fri, 27 Jan 2023 21:08:25 +0100 Subject: [PATCH] made hedonodon server ready --- .gitignore | 23 ++--- CRUDManager.py | 96 +++++++++---------- DbSetup.py | 23 ++--- Main.py | 196 +++++++++++++++++++------------------- MastodonAccountManager.py | 10 +- README.md | 6 +- SentiTooter.py | 148 ++++++++++++++-------------- Tables.py | 62 ++++++------ TootCrawler.py | 94 +++++++++--------- requirements.txt | 20 ++-- 10 files changed, 342 insertions(+), 336 deletions(-) diff --git a/.gitignore b/.gitignore index d946327..613ca0d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,12 @@ -database.db -plots -instance -__pycache__ -hedonodon_clientcred.secret -hedonodon_usercred.secret -.fleet -test.py -.idea -cardiffnlp -venv \ No newline at end of file +database.db +plots +instance +__pycache__ +hedonodon_clientcred.secret +hedonodon_usercred.secret +.fleet +test.py +.idea +cardiffnlp +venv +logs.txt diff --git a/CRUDManager.py b/CRUDManager.py index 0226fbf..4f05098 100644 --- a/CRUDManager.py +++ b/CRUDManager.py @@ -1,48 +1,48 @@ -from DbSetup import engine, session, databaseUrl -import pandas as pd -from sqlalchemy import desc, select -from Tables import Toots - - -def calculateSentimentCount(): - query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount - FROM Toots - GROUP BY DATE(datetime), - sentiment - HAVING datetime >= DATE("now","-1 day") - AND datetime < DATE("now")''' - return pd.read_sql( - query, - databaseUrl, - parse_dates=["datetime"] - ) - -def calculateSentimentMean(dataframe): - negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1 - positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum() - sentimentSum = dataframe['sentimentCount'].sum() - sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum - sentimentDate = dataframe.loc[0]['date'] - return pd.DataFrame.from_records( - [ - { - 'date': sentimentDate, - 'sentimentsMean': sentimentMean - } - ] - ) - -class CRUDManager(): - - def saveToDatabase(self, dataframe, table:str, useIndex=False): - try: - dataframe.to_sql(table, engine, index=useIndex, if_exists="append") - except: - print(f'Could not save data to {table}!') - - def loadFromDatabase(self, table:str, indexColumn=None): - return pd.read_sql_table(table, databaseUrl, index_col=indexColumn) - - def getLastToot(self): - stmt = select(Toots.tootId).order_by(desc('datetime')) - return session.scalars(stmt).first() \ No newline at end of file +from DbSetup import connection, engine, session, databaseUrl +import pandas as pd +from sqlalchemy import desc, select, sql +from Tables import Toots + + +def calculateSentimentCount(): + query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount + FROM Toots + GROUP BY DATE(datetime), + sentiment + HAVING datetime >= DATE("now","-1 day") + AND datetime < DATE("now")''' + return pd.read_sql( + sql.text(query), + connection, + parse_dates=["datetime"] + ) + +def calculateSentimentMean(dataframe): + negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1 + positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum() + sentimentSum = dataframe['sentimentCount'].sum() + sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum + sentimentDate = dataframe.loc[0]['date'] + return pd.DataFrame.from_records( + [ + { + 'date': sentimentDate, + 'sentimentsMean': sentimentMean + } + ] + ) + +class CRUDManager(): + + def saveToDatabase(self, dataframe, table:str, useIndex=False): + try: + dataframe.to_sql(table, engine, index=useIndex, if_exists="append") + except: + print(f'Could not save data to {table}!') + + def loadFromDatabase(self, table:str, indexColumn=None): + return pd.read_sql_table(table, connection, index_col=indexColumn) + + def getLastToot(self): + stmt = select(Toots.tootId).order_by(desc('datetime')) + return session.scalars(stmt).first() diff --git a/DbSetup.py b/DbSetup.py index 1898556..c787928 100644 --- a/DbSetup.py +++ b/DbSetup.py @@ -1,11 +1,12 @@ -from sqlalchemy import create_engine -from sqlalchemy.orm import Session -from sqlalchemy.ext.declarative import declarative_base - -databaseUrl = 'sqlite:///database.db' -engine = create_engine(databaseUrl, future=True) -session = Session(engine) -Base = declarative_base() - -def init_db(): - Base.metadata.create_all(bind=engine) +from sqlalchemy import create_engine +from sqlalchemy.orm import Session +from sqlalchemy.ext.declarative import declarative_base + +databaseUrl = 'sqlite:///database.db' +engine = create_engine(databaseUrl, future=True) +connection = engine.connect() +session = Session(engine) +Base = declarative_base() + +def init_db(): + Base.metadata.create_all(bind=engine) diff --git a/Main.py b/Main.py index b80b3a6..2b6768a 100644 --- a/Main.py +++ b/Main.py @@ -1,98 +1,98 @@ -from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean -from datetime import datetime, date -from DbSetup import init_db -import locale -from MastodonAccountManager import MastodonAccountManager -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -from TootCrawler import TootCrawler - -locale.setlocale(locale.LC_TIME, "en_EN.UTF-8") -init_db() - -mastodonAccountManager = MastodonAccountManager() -mastodonInstance = mastodonAccountManager.instance -""" -mastodonInstance.log_in( - 'USER-EMAIL', - 'PW', - to_file = 'hedonodon_usercred.secret' -) -""" - -tootCrawler = TootCrawler(mastodonInstance) -crudManager = CRUDManager() - -lastTootId = crudManager.getLastToot() -tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId) - -if not tootsDataframe.empty: - crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False) -else: - print('Nothing changed since last database insert!') - -sentimentsYesterday = calculateSentimentCount() -sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday) - -if not tootsDataframe.empty: - crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True) - crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True) -else: - print('Nothing changed since last database insert!') - -colormap = { - 'negative': '#ff9999', - 'neutral': '#ffcc99', - "positive": '#99ff99' -} - -todaysColors = [] -for sentiment in sentimentsYesterday['sentiment'].to_numpy(): - todaysColors.append(colormap[sentiment]) - - - -TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y') -dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment') -dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1) - -fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10)) - -# Pie chart. -pieChartlabels = dataframe4PieChart.index.to_numpy() -pieChart = dataframe4PieChart.plot.pie( - ax=axes[0], - y='sentimentCount', - ylabel="", - labels=dataframe4PieChart['sentimentCount'], - title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org', - colors=todaysColors, - wedgeprops=dict(linewidth=3, edgecolor='w'), - startangle=90 -) - -axes[0].axis('equal') -centre_circle = plt.Circle((0, 0), 0.6, fc='white') -axes[0].add_patch(centre_circle) -chartBox = axes[0].get_position() -axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9)) - -# Line chart. -lineChart = dataframe4LineChart.plot.line( - ax=axes[1], - title='Mean of all sentiments from max positive (1) to min negative (-1)' -) -axes[1].grid(True) -axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)]) -axes[1].set_ylim([-1, 1]) -axes[1].xaxis.set_major_locator(mdates.MonthLocator()) -axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15)) -axes[1].xaxis.set_major_formatter(plt.NullFormatter()) -axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h')) -axes[1].tick_params(which='minor', length=0) -plotFileUrl = f'./plots/{TodayDate}.png' -plt.savefig(plotFileUrl) - -media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.") -mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') - +from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean +from datetime import datetime, date +from DbSetup import init_db +import locale +from MastodonAccountManager import MastodonAccountManager +import matplotlib.pyplot as plt +import matplotlib.dates as mdates +from TootCrawler import TootCrawler + +locale.setlocale(locale.LC_TIME, "en_US.UTF-8") +init_db() + +mastodonAccountManager = MastodonAccountManager() +mastodonInstance = mastodonAccountManager.instance +""" +mastodonInstance.log_in( + 'USER-EMAIL', + 'PW', + to_file = 'hedonodon_usercred.secret' +) +""" + +tootCrawler = TootCrawler(mastodonInstance) +crudManager = CRUDManager() + +lastTootId = crudManager.getLastToot() +tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId) + +if not tootsDataframe.empty: + crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False) +else: + print('Nothing changed since last database insert!') + +sentimentsYesterday = calculateSentimentCount() +sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday) + +if not tootsDataframe.empty: + crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True) + crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True) +else: + print('Nothing changed since last database insert!') + +colormap = { + 'negative': '#ff9999', + 'neutral': '#ffcc99', + "positive": '#99ff99' +} + +todaysColors = [] +for sentiment in sentimentsYesterday['sentiment'].to_numpy(): + todaysColors.append(colormap[sentiment]) + + + +TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y') +dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment') +dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1) + +fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10)) + +# Pie chart. +pieChartlabels = dataframe4PieChart.index.to_numpy() +pieChart = dataframe4PieChart.plot.pie( + ax=axes[0], + y='sentimentCount', + ylabel="", + labels=dataframe4PieChart['sentimentCount'], + title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org', + colors=todaysColors, + wedgeprops=dict(linewidth=3, edgecolor='w'), + startangle=90 +) + +axes[0].axis('equal') +centre_circle = plt.Circle((0, 0), 0.6, fc='white') +axes[0].add_patch(centre_circle) +chartBox = axes[0].get_position() +axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9)) + +# Line chart. +lineChart = dataframe4LineChart.plot.line( + ax=axes[1], + title='Mean of all sentiments from max positive (1) to min negative (-1)' +) +axes[1].grid(True) +axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)]) +axes[1].set_ylim([-1, 1]) +axes[1].xaxis.set_major_locator(mdates.MonthLocator()) +axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15)) +axes[1].xaxis.set_major_formatter(plt.NullFormatter()) +axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h')) +axes[1].tick_params(which='minor', length=0) +plotFileUrl = f'./plots/{TodayDate}.png' +plt.savefig(plotFileUrl) + +media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.") +mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') + diff --git a/MastodonAccountManager.py b/MastodonAccountManager.py index 9c51e54..68d62bb 100644 --- a/MastodonAccountManager.py +++ b/MastodonAccountManager.py @@ -1,5 +1,5 @@ -from mastodon import Mastodon - -class MastodonAccountManager(): - def __init__(self): - self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret') +from mastodon import Mastodon + +class MastodonAccountManager(): + def __init__(self): + self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret') diff --git a/README.md b/README.md index fdbe2f7..5440dc1 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Hedonodon -I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds. - +# Hedonodon +I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds. + More Documentation coming soon! \ No newline at end of file diff --git a/SentiTooter.py b/SentiTooter.py index 26b7a47..00899c2 100644 --- a/SentiTooter.py +++ b/SentiTooter.py @@ -1,74 +1,74 @@ -from germansentiment import SentimentModel -import numpy as np -from scipy.special import softmax -from transformers import AutoModelForSequenceClassification -from transformers import AutoTokenizer -from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer - - -# Preprocess text (username and link placeholders) -def preprocess(text): - new_text = [] - - for t in text.split(" "): - t = '@user' if t.startswith('@') and len(t) > 1 else t - t = 'http' if t.startswith('http') else t - new_text.append(t) - return " ".join(new_text) - - -class SentiTooter: - """""" - - def __init__(self): - self.deModel = SentimentModel() - self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment" - self.enModel, self.enTokenizer = self.initModel() - # https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt - self.labels = ['negative', 'neutral', 'positive'] - self.sia = SentimentIntensityAnalyzer() - - def analyze(self, language, content): - match language: - case 'de': - sentiment = self.deModel.predict_sentiment([content]) - sentiment.append('germanSentiment') - return sentiment - case 'en': - text = preprocess(content) - encoded_input = self.enTokenizer(text, return_tensors='pt') - output = self.enModel(**encoded_input) - scores = output[0][0].detach().numpy() - scores = softmax(scores) - sentimentIndexWithMaxScore = np.argmax(scores) - sentimentLabel = self.labels[sentimentIndexWithMaxScore] - sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment'] - return sentiment - case _: - compound = self.sia.polarity_scores(content)['compound'] - if compound > (1 / 3): - return ['positive', 'vaderSentiment'] - elif compound < (-1 / 3): - return ['negative', 'vaderSentiment'] - else: - return ['neutral', 'vaderSentiment'] - - - - def initModel(self): - # PT - tokenizer = AutoTokenizer.from_pretrained(self.enModelType) - tokenizer.save_pretrained(self.enModelType) - model = AutoModelForSequenceClassification.from_pretrained(self.enModelType) - model.save_pretrained(self.enModelType) - return model, tokenizer - - # # TF - # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL) - # model.save_pretrained(MODEL) - - # text = "Good night 😊" - # encoded_input = tokenizer(text, return_tensors='tf') - # output = model(encoded_input) - # scores = output[0][0].numpy() - # scores = softmax(scores) +from germansentiment import SentimentModel +import numpy as np +from scipy.special import softmax +from transformers import AutoModelForSequenceClassification +from transformers import AutoTokenizer +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer + + +# Preprocess text (username and link placeholders) +def preprocess(text): + new_text = [] + + for t in text.split(" "): + t = '@user' if t.startswith('@') and len(t) > 1 else t + t = 'http' if t.startswith('http') else t + new_text.append(t) + return " ".join(new_text) + + +class SentiTooter: + """""" + + def __init__(self): + self.deModel = SentimentModel() + self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment" + self.enModel, self.enTokenizer = self.initModel() + # https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt + self.labels = ['negative', 'neutral', 'positive'] + self.sia = SentimentIntensityAnalyzer() + + def analyze(self, language, content): + match language: + case 'de': + sentiment = self.deModel.predict_sentiment([content]) + sentiment.append('germanSentiment') + return sentiment + case 'en': + text = preprocess(content) + encoded_input = self.enTokenizer(text, return_tensors='pt') + output = self.enModel(**encoded_input) + scores = output[0][0].detach().numpy() + scores = softmax(scores) + sentimentIndexWithMaxScore = np.argmax(scores) + sentimentLabel = self.labels[sentimentIndexWithMaxScore] + sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment'] + return sentiment + case _: + compound = self.sia.polarity_scores(content)['compound'] + if compound > (1 / 3): + return ['positive', 'vaderSentiment'] + elif compound < (-1 / 3): + return ['negative', 'vaderSentiment'] + else: + return ['neutral', 'vaderSentiment'] + + + + def initModel(self): + # PT + tokenizer = AutoTokenizer.from_pretrained(self.enModelType) + tokenizer.save_pretrained(self.enModelType) + model = AutoModelForSequenceClassification.from_pretrained(self.enModelType) + model.save_pretrained(self.enModelType) + return model, tokenizer + + # # TF + # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL) + # model.save_pretrained(MODEL) + + # text = "Good night 😊" + # encoded_input = tokenizer(text, return_tensors='tf') + # output = model(encoded_input) + # scores = output[0][0].numpy() + # scores = softmax(scores) diff --git a/Tables.py b/Tables.py index 78aa412..c64178a 100644 --- a/Tables.py +++ b/Tables.py @@ -1,32 +1,32 @@ -from DbSetup import Base -from sqlalchemy import Column, Date, Integer, Float, String - -class Toots(Base): - __tablename__ = 'Toots' - __table_args__ = {'extend_existing': True} - index = Column(Integer, primary_key=True) - model = Column(String(30)) - datetime = Column(Date) - language = Column(String(3)) - sentiment = Column(String(8)) - tootId = Column(String(255)) - toot = Column(String(600)) - userName = Column(String(255)) - userId = Column(String(255)) - - - -class SentimentCounts(Base): - __tablename__ = 'SentimentCounts' - __table_args__ = {'extend_existing': True} - index = Column(Integer, primary_key=True) - sentimentCount = Column(Integer) - date = Column(Date, primary_key=True) - sentiment = Column(String(8)) - -class SentimentMeans(Base): - __tablename__ = 'SentimentMeans' - __table_args__ = {'extend_existing': True} - index = Column(Integer, primary_key=True) - date = Column(Date, primary_key=True) +from DbSetup import Base +from sqlalchemy import Column, Date, Integer, Float, String + +class Toots(Base): + __tablename__ = 'Toots' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + model = Column(String(30)) + datetime = Column(Date) + language = Column(String(3)) + sentiment = Column(String(8)) + tootId = Column(String(255)) + toot = Column(String(600)) + userName = Column(String(255)) + userId = Column(String(255)) + + + +class SentimentCounts(Base): + __tablename__ = 'SentimentCounts' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + sentimentCount = Column(Integer) + date = Column(Date, primary_key=True) + sentiment = Column(String(8)) + +class SentimentMeans(Base): + __tablename__ = 'SentimentMeans' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + date = Column(Date, primary_key=True) SentimentsMean = Column(Float) \ No newline at end of file diff --git a/TootCrawler.py b/TootCrawler.py index 1b081c2..fa131bf 100644 --- a/TootCrawler.py +++ b/TootCrawler.py @@ -1,48 +1,48 @@ -from langdetect import detect -import pytz -import pandas as pd -import re -from SentiTooter import SentiTooter -from pprint import pprint - -class TootCrawler(): - - def __init__(self, mastodonInstance) -> None: - self.mastodonInstance = mastodonInstance - self.compilePattern = re.compile('<.*?>') - self.sentiTooter = SentiTooter() - self.localTimezone = pytz.timezone('Europe/Berlin') - - def getLocalTimeline(self, minId=None): - return self.mastodonInstance.timeline_local(min_id=minId, limit=500) - - def cleanhtml(self, raw_html): - cleantext = re.sub(self.compilePattern, '', raw_html) - cleantext = re.sub(r'http\S+', '', cleantext) - return cleantext - - def buildTootsDataframe(self, minId=None): - toots = [] - allTimelineResults = [] - timelinePagination = self.getLocalTimeline(minId) - - while timelinePagination: - allTimelineResults = allTimelineResults + timelinePagination - timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination) - for i in allTimelineResults: - content = self.cleanhtml(i.content) - language = detect(content) - sentiment = self.sentiTooter.analyze(language, content) - toot = { - "sentiment": sentiment[0], - "model": sentiment[1], - "toot": content, - "datetime": i.created_at.astimezone(self.localTimezone), - "language": language, - "userName": i.account.display_name, - "userId": i.account.id, - "tootId": i.id - } - toots.append(toot) - toots.sort(key=lambda item:item.get('datetime')) +from langdetect import detect +import pytz +import pandas as pd +import re +from SentiTooter import SentiTooter +from pprint import pprint + +class TootCrawler(): + + def __init__(self, mastodonInstance) -> None: + self.mastodonInstance = mastodonInstance + self.compilePattern = re.compile('<.*?>') + self.sentiTooter = SentiTooter() + self.localTimezone = pytz.timezone('Europe/Berlin') + + def getLocalTimeline(self, minId=None): + return self.mastodonInstance.timeline_local(min_id=minId, limit=500) + + def cleanhtml(self, raw_html): + cleantext = re.sub(self.compilePattern, '', raw_html) + cleantext = re.sub(r'http\S+', '', cleantext) + return cleantext + + def buildTootsDataframe(self, minId=None): + toots = [] + allTimelineResults = [] + timelinePagination = self.getLocalTimeline(minId) + + while timelinePagination: + allTimelineResults = allTimelineResults + timelinePagination + timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination) + for i in allTimelineResults: + content = self.cleanhtml(i.content) + language = detect(content) + sentiment = self.sentiTooter.analyze(language, content) + toot = { + "sentiment": sentiment[0], + "model": sentiment[1], + "toot": content, + "datetime": i.created_at.astimezone(self.localTimezone), + "language": language, + "userName": i.account.display_name, + "userId": i.account.id, + "tootId": i.id + } + toots.append(toot) + toots.sort(key=lambda item:item.get('datetime')) return pd.DataFrame.from_records(toots) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 842b542..47f6d81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,12 @@ -mastodon.py -matplotlib -pandas -sqlalchemy -vader-multi -numpy -pytz -transformers \ No newline at end of file +mastodon.py +matplotlib +pandas +sqlalchemy +vader-multi +langdetect +numpy +pytz +transformers +wheel +germansentiment +scipy