From 0b13673bcce081a279852b122eec2fc319f3bc00 Mon Sep 17 00:00:00 2001 From: rnsrk Date: Tue, 3 Jan 2023 00:40:59 +0100 Subject: [PATCH] first commit --- .gitignore | 6 +++++ CRUDManager.py | 36 +++++++++++++++++++++++++ DbSetup.py | 11 ++++++++ Main.py | 57 +++++++++++++++++++++++++++++++++++++++ MastodonAccountManager.py | 5 ++++ README.md | 4 +++ SentiTooter.py | 19 +++++++++++++ Tables.py | 33 +++++++++++++++++++++++ TootCrawler.py | 43 +++++++++++++++++++++++++++++ requirements.txt | 6 +++++ 10 files changed, 220 insertions(+) create mode 100644 .gitignore create mode 100644 CRUDManager.py create mode 100644 DbSetup.py create mode 100644 Main.py create mode 100644 MastodonAccountManager.py create mode 100644 README.md create mode 100644 SentiTooter.py create mode 100644 Tables.py create mode 100644 TootCrawler.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d8a3a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +database.db +plots +instance +__pycache__ +hedonodon_clientcred.secret +hedonodon_usercred.secret \ No newline at end of file diff --git a/CRUDManager.py b/CRUDManager.py new file mode 100644 index 0000000..4211035 --- /dev/null +++ b/CRUDManager.py @@ -0,0 +1,36 @@ +from DbSetup import engine, session, databaseUrl +import pandas as pd +from sqlalchemy import desc, select +from Tables import Toots + +class CRUDManager(): + + def saveToDatabase(self, dataframe, table:str, useIndex=False): + try: + dataframe.to_sql(table, engine, index=useIndex, if_exists="append") + except: + print(f'Could not save data to {table}!') + + def loadFromDatabase(self, table:str, indexColumn=None): + return pd.read_sql_table(table, databaseUrl, index_col=indexColumn) + + def getLastToot(self): + stmt = select(Toots.tootId).order_by(desc('datetime')) + return session.scalars(stmt).first() + + def calculateAggregates(self, column, aggregate='Count'): + if (aggregate=='Count'): + addGroup = f', {column} ' + else: + addGroup = '' + query = f'''SELECT DATE(datetime) as date {addGroup}, {aggregate}({column}) as {column}{aggregate} + FROM Toots + GROUP BY DATE(datetime)''' \ + + addGroup \ + + '''HAVING datetime >= DATE("now","-1 day") + AND datetime < DATE("now")''' + return pd.read_sql( + query, + databaseUrl, + parse_dates=["datetime"] + ) diff --git a/DbSetup.py b/DbSetup.py new file mode 100644 index 0000000..1898556 --- /dev/null +++ b/DbSetup.py @@ -0,0 +1,11 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import Session +from sqlalchemy.ext.declarative import declarative_base + +databaseUrl = 'sqlite:///database.db' +engine = create_engine(databaseUrl, future=True) +session = Session(engine) +Base = declarative_base() + +def init_db(): + Base.metadata.create_all(bind=engine) diff --git a/Main.py b/Main.py new file mode 100644 index 0000000..2dcdd3a --- /dev/null +++ b/Main.py @@ -0,0 +1,57 @@ +from CRUDManager import CRUDManager +from datetime import datetime +from DbSetup import init_db +import locale +from MastodonAccountManager import MastodonAccountManager +import matplotlib.pyplot as plt +from TootCrawler import TootCrawler +from sqlalchemy.sql import desc, select + +locale.setlocale(locale.LC_TIME, "de_DE.UTF-8") +init_db() + +mastodonAccountManager = MastodonAccountManager() +mastodonInstance = mastodonAccountManager.instance +""" +mastodonInstance.log_in( + 'USER-EMAIL', + 'PW', + to_file = 'hedonodon_usercred.secret' +) +""" + +tootCrawler = TootCrawler(mastodonInstance) +crudManager = CRUDManager() + +lastTootId = crudManager.getLastToot() +tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId) +sentimentsYesterday = crudManager.calculateAggregates('sentiment', 'Count') +compoundsYesterday = crudManager.calculateAggregates('compound', 'Avg') +if not tootsDataframe.empty: + crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False) + crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='Sentiments', useIndex=True) + crudManager.saveToDatabase(dataframe=compoundsYesterday, table='Compounds', useIndex=True) + #print(sentimentsYesterday, 'sentimentsYesterday') + #print(compoundsYesterday, 'sentimentsYesterday') +else: + print('Nothing changed since last database insert!') + +TodayDate= datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y') +dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment') +dataframe4LineChart = crudManager.loadFromDatabase('Compounds', 'date').drop('index', axis=1) + +fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,10)) + +pieChartlabels = dataframe4PieChart.index.to_numpy() +pieChart = dataframe4PieChart.plot.pie(ax=axes[0], y='sentimentCount', ylabel="", labels=dataframe4PieChart['sentimentCount'], title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org', colors = ['red', 'grey', 'green']) +chartBox = axes[0].get_position() +axes[0].set_position([chartBox.x0,chartBox.y0-0.2,chartBox.width,chartBox.height]) +axes[0].legend(pieChartlabels,loc='upper right', bbox_to_anchor=(1.3, 0.9)) +lineChart = dataframe4LineChart.plot.line(ax=axes[1], title='Compounds from max positive (1) to min neg (-1)') +axes[1].set_ylim([-1, 1]) + +plotFileUrl = f'./plots/{TodayDate}.png' +plt.savefig(plotFileUrl) + +media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the compounds up to {TodayDate}.") +mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') \ No newline at end of file diff --git a/MastodonAccountManager.py b/MastodonAccountManager.py new file mode 100644 index 0000000..6542659 --- /dev/null +++ b/MastodonAccountManager.py @@ -0,0 +1,5 @@ +from mastodon import Mastodon + +class MastodonAccountManager(): + def __init__(self): + self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret') diff --git a/README.md b/README.md new file mode 100644 index 0000000..fdbe2f7 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# Hedonodon +I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds. + +More Documentation coming soon! \ No newline at end of file diff --git a/SentiTooter.py b/SentiTooter.py new file mode 100644 index 0000000..b745c2f --- /dev/null +++ b/SentiTooter.py @@ -0,0 +1,19 @@ +from math import sqrt +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer +import numpy as np + +class SentiTooter(): + """""" + def __init__(self): + self.sia = SentimentIntensityAnalyzer() + + + def analyze(self, toot): + compound = self.sia.polarity_scores(toot.content)['compound'] + if (compound > (1/3)): + return ['positive', compound] + elif (compound < (-1/3)): + return ['negative', compound] + else: + return ['neutral', compound] + diff --git a/Tables.py b/Tables.py new file mode 100644 index 0000000..4ae477f --- /dev/null +++ b/Tables.py @@ -0,0 +1,33 @@ +from DbSetup import Base +from sqlalchemy import Column, Date, Integer, Float, String + +class Toots(Base): + __tablename__ = 'Toots' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + compound = Column(Float) + datetime = Column(Date) + language = Column(String(3)) + sentiment = Column(String(8)) + tootId = Column(String(255)) + toot = Column(String(600)) + userName = Column(String(255)) + userId = Column(String(255)) + + + +class Sentiments(Base): + __tablename__ = 'Sentiments' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + sentimentCount = Column(Integer) + date = Column(Date, primary_key = True) + sentiment = Column(String(8)) + + +class Compounds(Base): + __tablename__ = 'Compounds' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + date = Column(Date, primary_key = True) + compoundAvg = Column(Float) \ No newline at end of file diff --git a/TootCrawler.py b/TootCrawler.py new file mode 100644 index 0000000..47b87f0 --- /dev/null +++ b/TootCrawler.py @@ -0,0 +1,43 @@ +import pytz +import pandas as pd +import re +from SentiTooter import SentiTooter +from pprint import pprint + + +class TootCrawler(): + + def __init__(self, mastodonInstance) -> None: + self.mastodonInstance = mastodonInstance + self.compilePattern = re.compile('<.*?>') + self.sentiTooter = SentiTooter() + self.localTimezone = pytz.timezone('Europe/Berlin') + + def getLocalTimeline(self, sinceId=None): + return self.mastodonInstance.timeline_local(since_id=sinceId) + + def cleanhtml(self, raw_html): + cleantext = re.sub(self.compilePattern, '', raw_html) + cleantext = re.sub(r'http\S+', '', cleantext) + return cleantext + + def buildTootsDataframe(self, sinceId=None): + toots = [] + + for i in self.getLocalTimeline(sinceId): + content = self.cleanhtml(i.content) + sentiment = self.sentiTooter.analyze(i) + toots.append( + { + "sentiment": sentiment[0], + "compound": sentiment[1], + "userName": i.account.display_name, + "userId": i.account.id, + "toot": content, + "datetime": i.created_at.astimezone(self.localTimezone), + "language": i.language, + "tootId": i.id + } + ) + toots.sort(key=lambda item:item.get('datetime')) + return pd.DataFrame.from_records(toots) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4880a8c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +mastodon.py +matplotlib +pandas +sqlalchemy +vader-multi +numpy \ No newline at end of file