Merge branch 'main' into with_cites

This commit is contained in:
rnsrk 2023-03-15 13:25:41 +01:00
commit 09fd313a89
9 changed files with 124 additions and 118 deletions

23
.gitignore vendored
View file

@ -1,11 +1,12 @@
database.db database.db
plots plots
instance instance
__pycache__ __pycache__
hedonodon_clientcred.secret hedonodon_clientcred.secret
hedonodon_usercred.secret hedonodon_usercred.secret
.fleet .fleet
test.py test.py
.idea .idea
cardiffnlp cardiffnlp
venv venv
logs.txt

View file

@ -1,6 +1,6 @@
from DbSetup import engine, session, databaseUrl from DbSetup import connection, engine, session, databaseUrl
import pandas as pd import pandas as pd
from sqlalchemy import desc, select from sqlalchemy import desc, select, sql
from Tables import Toots from Tables import Toots
def calculateSentimentCount(): def calculateSentimentCount():
@ -11,8 +11,8 @@ def calculateSentimentCount():
HAVING datetime >= DATE("now","-1 day") HAVING datetime >= DATE("now","-1 day")
AND datetime < DATE("now")''' AND datetime < DATE("now")'''
return pd.read_sql( return pd.read_sql(
query, sql.text(query),
databaseUrl, connection,
parse_dates=["datetime"] parse_dates=["datetime"]
) )
@ -51,8 +51,8 @@ class CRUDManager():
print(f'Could not save data to {table}!') print(f'Could not save data to {table}!')
def loadFromDatabase(self, table:str, indexColumn=None): def loadFromDatabase(self, table:str, indexColumn=None):
return pd.read_sql_table(table, databaseUrl, index_col=indexColumn) return pd.read_sql_table(table, connection, index_col=indexColumn)
def getLastToot(self): def getLastToot(self):
stmt = select(Toots.tootId).order_by(desc('datetime')) stmt = select(Toots.tootId).order_by(desc('datetime'))
return session.scalars(stmt).first() return session.scalars(stmt).first()

View file

@ -1,11 +1,12 @@
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
databaseUrl = 'sqlite:///database.db' databaseUrl = 'sqlite:///database.db'
engine = create_engine(databaseUrl, future=True) engine = create_engine(databaseUrl, future=True)
session = Session(engine) connection = engine.connect()
Base = declarative_base() session = Session(engine)
Base = declarative_base()
def init_db():
Base.metadata.create_all(bind=engine) def init_db():
Base.metadata.create_all(bind=engine)

View file

@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
import matplotlib.dates as mdates import matplotlib.dates as mdates
from TootCrawler import TootCrawler from TootCrawler import TootCrawler
locale.setlocale(locale.LC_TIME, "en_EN.UTF-8") locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
init_db() init_db()
mastodonAccountManager = MastodonAccountManager() mastodonAccountManager = MastodonAccountManager()
@ -84,7 +84,7 @@ axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
# Line chart. # Line chart.
lineChart = dataframe4LineChart.plot.line( lineChart = dataframe4LineChart.plot.line(
ax=axes[1], ax=axes[1],
title='Mean of all sentiments from max positive (1) to min negative (-1)' title='"Mean" of sentiments, calculated from nominal values, pos(1), neu (0), neg (-1)!'
) )
axes[1].grid(True) axes[1].grid(True)
axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)]) axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])

View file

@ -1,5 +1,5 @@
from mastodon import Mastodon from mastodon import Mastodon
class MastodonAccountManager(): class MastodonAccountManager():
def __init__(self): def __init__(self):
self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret') self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')

View file

@ -1,4 +1,4 @@
# Hedonodon # Hedonodon
I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds. I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
More Documentation coming soon! More Documentation coming soon!

View file

@ -1,32 +1,32 @@
from DbSetup import Base from DbSetup import Base
from sqlalchemy import Column, Date, Integer, Float, String from sqlalchemy import Column, Date, Integer, Float, String
class Toots(Base): class Toots(Base):
__tablename__ = 'Toots' __tablename__ = 'Toots'
__table_args__ = {'extend_existing': True} __table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True) index = Column(Integer, primary_key=True)
model = Column(String(30)) model = Column(String(30))
datetime = Column(Date) datetime = Column(Date)
language = Column(String(3)) language = Column(String(3))
sentiment = Column(String(8)) sentiment = Column(String(8))
tootId = Column(String(255)) tootId = Column(String(255))
toot = Column(String(600)) toot = Column(String(600))
userName = Column(String(255)) userName = Column(String(255))
userId = Column(String(255)) userId = Column(String(255))
class SentimentCounts(Base): class SentimentCounts(Base):
__tablename__ = 'SentimentCounts' __tablename__ = 'SentimentCounts'
__table_args__ = {'extend_existing': True} __table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True) index = Column(Integer, primary_key=True)
sentimentCount = Column(Integer) sentimentCount = Column(Integer)
date = Column(Date, primary_key=True) date = Column(Date, primary_key=True)
sentiment = Column(String(8)) sentiment = Column(String(8))
class SentimentMeans(Base): class SentimentMeans(Base):
__tablename__ = 'SentimentMeans' __tablename__ = 'SentimentMeans'
__table_args__ = {'extend_existing': True} __table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True) index = Column(Integer, primary_key=True)
date = Column(Date, primary_key=True) date = Column(Date, primary_key=True)
SentimentsMean = Column(Float) SentimentsMean = Column(Float)

View file

@ -1,48 +1,51 @@
from langdetect import detect from langdetect import detect
import pytz import pytz
import pandas as pd import pandas as pd
import re import re
from SentiTooter import SentiTooter from SentiTooter import SentiTooter
from pprint import pprint from pprint import pprint
class TootCrawler(): class TootCrawler():
def __init__(self, mastodonInstance) -> None: def __init__(self, mastodonInstance) -> None:
self.mastodonInstance = mastodonInstance self.mastodonInstance = mastodonInstance
self.compilePattern = re.compile('<.*?>') self.compilePattern = re.compile('<.*?>')
self.sentiTooter = SentiTooter() self.sentiTooter = SentiTooter()
self.localTimezone = pytz.timezone('Europe/Berlin') self.localTimezone = pytz.timezone('Europe/Berlin')
def getLocalTimeline(self, minId=None): def getLocalTimeline(self, minId=None):
return self.mastodonInstance.timeline_local(min_id=minId, limit=500) return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
def cleanhtml(self, raw_html): def cleanhtml(self, raw_html):
cleantext = re.sub(self.compilePattern, '', raw_html) cleantext = re.sub(self.compilePattern, '', raw_html)
cleantext = re.sub(r'http\S+', '', cleantext) cleantext = re.sub(r'http\S+', '', cleantext)
return cleantext return cleantext
def buildTootsDataframe(self, minId=None): def buildTootsDataframe(self, minId=None):
toots = [] toots = []
allTimelineResults = [] allTimelineResults = []
timelinePagination = self.getLocalTimeline(minId) timelinePagination = self.getLocalTimeline(minId)
while timelinePagination: while timelinePagination:
allTimelineResults = allTimelineResults + timelinePagination allTimelineResults = allTimelineResults + timelinePagination
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination) timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
for i in allTimelineResults: for i in allTimelineResults:
content = self.cleanhtml(i.content) content = self.cleanhtml(i.content)
language = detect(content) try:
sentiment = self.sentiTooter.analyze(language, content) language = detect(content)
toot = { except:
"sentiment": sentiment[0], language = None
"model": sentiment[1], sentiment = self.sentiTooter.analyze(language, content)
"toot": content, toot = {
"datetime": i.created_at.astimezone(self.localTimezone), "sentiment": sentiment[0],
"language": language, "model": sentiment[1],
"userName": i.account.display_name, "toot": content,
"userId": i.account.id, "datetime": i.created_at.astimezone(self.localTimezone),
"tootId": i.id "language": language,
} "userName": i.account.display_name,
toots.append(toot) "userId": i.account.id,
toots.sort(key=lambda item:item.get('datetime')) "tootId": i.id
}
toots.append(toot)
toots.sort(key=lambda item:item.get('datetime'))
return pd.DataFrame.from_records(toots) return pd.DataFrame.from_records(toots)

View file

@ -3,9 +3,10 @@ matplotlib
pandas pandas
sqlalchemy sqlalchemy
vader-multi vader-multi
langdetect
numpy numpy
pytz pytz
transformers transformers
langdetect wheel
germansentiment germansentiment
scipy scipy