made hedonodon server ready
This commit is contained in:
parent
52223192b4
commit
2b98565444
10 changed files with 342 additions and 336 deletions
23
.gitignore
vendored
23
.gitignore
vendored
|
|
@ -1,11 +1,12 @@
|
|||
database.db
|
||||
plots
|
||||
instance
|
||||
__pycache__
|
||||
hedonodon_clientcred.secret
|
||||
hedonodon_usercred.secret
|
||||
.fleet
|
||||
test.py
|
||||
.idea
|
||||
cardiffnlp
|
||||
venv
|
||||
database.db
|
||||
plots
|
||||
instance
|
||||
__pycache__
|
||||
hedonodon_clientcred.secret
|
||||
hedonodon_usercred.secret
|
||||
.fleet
|
||||
test.py
|
||||
.idea
|
||||
cardiffnlp
|
||||
venv
|
||||
logs.txt
|
||||
|
|
|
|||
|
|
@ -1,48 +1,48 @@
|
|||
from DbSetup import engine, session, databaseUrl
|
||||
import pandas as pd
|
||||
from sqlalchemy import desc, select
|
||||
from Tables import Toots
|
||||
|
||||
|
||||
def calculateSentimentCount():
|
||||
query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
|
||||
FROM Toots
|
||||
GROUP BY DATE(datetime),
|
||||
sentiment
|
||||
HAVING datetime >= DATE("now","-1 day")
|
||||
AND datetime < DATE("now")'''
|
||||
return pd.read_sql(
|
||||
query,
|
||||
databaseUrl,
|
||||
parse_dates=["datetime"]
|
||||
)
|
||||
|
||||
def calculateSentimentMean(dataframe):
|
||||
negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
|
||||
positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
|
||||
sentimentSum = dataframe['sentimentCount'].sum()
|
||||
sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
|
||||
sentimentDate = dataframe.loc[0]['date']
|
||||
return pd.DataFrame.from_records(
|
||||
[
|
||||
{
|
||||
'date': sentimentDate,
|
||||
'sentimentsMean': sentimentMean
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
class CRUDManager():
|
||||
|
||||
def saveToDatabase(self, dataframe, table:str, useIndex=False):
|
||||
try:
|
||||
dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
|
||||
except:
|
||||
print(f'Could not save data to {table}!')
|
||||
|
||||
def loadFromDatabase(self, table:str, indexColumn=None):
|
||||
return pd.read_sql_table(table, databaseUrl, index_col=indexColumn)
|
||||
|
||||
def getLastToot(self):
|
||||
stmt = select(Toots.tootId).order_by(desc('datetime'))
|
||||
return session.scalars(stmt).first()
|
||||
from DbSetup import connection, engine, session, databaseUrl
|
||||
import pandas as pd
|
||||
from sqlalchemy import desc, select, sql
|
||||
from Tables import Toots
|
||||
|
||||
|
||||
def calculateSentimentCount():
|
||||
query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
|
||||
FROM Toots
|
||||
GROUP BY DATE(datetime),
|
||||
sentiment
|
||||
HAVING datetime >= DATE("now","-1 day")
|
||||
AND datetime < DATE("now")'''
|
||||
return pd.read_sql(
|
||||
sql.text(query),
|
||||
connection,
|
||||
parse_dates=["datetime"]
|
||||
)
|
||||
|
||||
def calculateSentimentMean(dataframe):
|
||||
negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
|
||||
positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
|
||||
sentimentSum = dataframe['sentimentCount'].sum()
|
||||
sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
|
||||
sentimentDate = dataframe.loc[0]['date']
|
||||
return pd.DataFrame.from_records(
|
||||
[
|
||||
{
|
||||
'date': sentimentDate,
|
||||
'sentimentsMean': sentimentMean
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
class CRUDManager():
|
||||
|
||||
def saveToDatabase(self, dataframe, table:str, useIndex=False):
|
||||
try:
|
||||
dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
|
||||
except:
|
||||
print(f'Could not save data to {table}!')
|
||||
|
||||
def loadFromDatabase(self, table:str, indexColumn=None):
|
||||
return pd.read_sql_table(table, connection, index_col=indexColumn)
|
||||
|
||||
def getLastToot(self):
|
||||
stmt = select(Toots.tootId).order_by(desc('datetime'))
|
||||
return session.scalars(stmt).first()
|
||||
|
|
|
|||
23
DbSetup.py
23
DbSetup.py
|
|
@ -1,11 +1,12 @@
|
|||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
databaseUrl = 'sqlite:///database.db'
|
||||
engine = create_engine(databaseUrl, future=True)
|
||||
session = Session(engine)
|
||||
Base = declarative_base()
|
||||
|
||||
def init_db():
|
||||
Base.metadata.create_all(bind=engine)
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
databaseUrl = 'sqlite:///database.db'
|
||||
engine = create_engine(databaseUrl, future=True)
|
||||
connection = engine.connect()
|
||||
session = Session(engine)
|
||||
Base = declarative_base()
|
||||
|
||||
def init_db():
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
|
|
|||
196
Main.py
196
Main.py
|
|
@ -1,98 +1,98 @@
|
|||
from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
|
||||
from datetime import datetime, date
|
||||
from DbSetup import init_db
|
||||
import locale
|
||||
from MastodonAccountManager import MastodonAccountManager
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
from TootCrawler import TootCrawler
|
||||
|
||||
locale.setlocale(locale.LC_TIME, "en_EN.UTF-8")
|
||||
init_db()
|
||||
|
||||
mastodonAccountManager = MastodonAccountManager()
|
||||
mastodonInstance = mastodonAccountManager.instance
|
||||
"""
|
||||
mastodonInstance.log_in(
|
||||
'USER-EMAIL',
|
||||
'PW',
|
||||
to_file = 'hedonodon_usercred.secret'
|
||||
)
|
||||
"""
|
||||
|
||||
tootCrawler = TootCrawler(mastodonInstance)
|
||||
crudManager = CRUDManager()
|
||||
|
||||
lastTootId = crudManager.getLastToot()
|
||||
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
|
||||
|
||||
if not tootsDataframe.empty:
|
||||
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
|
||||
else:
|
||||
print('Nothing changed since last database insert!')
|
||||
|
||||
sentimentsYesterday = calculateSentimentCount()
|
||||
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
|
||||
|
||||
if not tootsDataframe.empty:
|
||||
crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
|
||||
crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
|
||||
else:
|
||||
print('Nothing changed since last database insert!')
|
||||
|
||||
colormap = {
|
||||
'negative': '#ff9999',
|
||||
'neutral': '#ffcc99',
|
||||
"positive": '#99ff99'
|
||||
}
|
||||
|
||||
todaysColors = []
|
||||
for sentiment in sentimentsYesterday['sentiment'].to_numpy():
|
||||
todaysColors.append(colormap[sentiment])
|
||||
|
||||
|
||||
|
||||
TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
|
||||
dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
|
||||
dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
|
||||
|
||||
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
|
||||
|
||||
# Pie chart.
|
||||
pieChartlabels = dataframe4PieChart.index.to_numpy()
|
||||
pieChart = dataframe4PieChart.plot.pie(
|
||||
ax=axes[0],
|
||||
y='sentimentCount',
|
||||
ylabel="",
|
||||
labels=dataframe4PieChart['sentimentCount'],
|
||||
title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
|
||||
colors=todaysColors,
|
||||
wedgeprops=dict(linewidth=3, edgecolor='w'),
|
||||
startangle=90
|
||||
)
|
||||
|
||||
axes[0].axis('equal')
|
||||
centre_circle = plt.Circle((0, 0), 0.6, fc='white')
|
||||
axes[0].add_patch(centre_circle)
|
||||
chartBox = axes[0].get_position()
|
||||
axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
|
||||
|
||||
# Line chart.
|
||||
lineChart = dataframe4LineChart.plot.line(
|
||||
ax=axes[1],
|
||||
title='Mean of all sentiments from max positive (1) to min negative (-1)'
|
||||
)
|
||||
axes[1].grid(True)
|
||||
axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
|
||||
axes[1].set_ylim([-1, 1])
|
||||
axes[1].xaxis.set_major_locator(mdates.MonthLocator())
|
||||
axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
|
||||
axes[1].xaxis.set_major_formatter(plt.NullFormatter())
|
||||
axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
|
||||
axes[1].tick_params(which='minor', length=0)
|
||||
plotFileUrl = f'./plots/{TodayDate}.png'
|
||||
plt.savefig(plotFileUrl)
|
||||
|
||||
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
|
||||
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
|
||||
|
||||
from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
|
||||
from datetime import datetime, date
|
||||
from DbSetup import init_db
|
||||
import locale
|
||||
from MastodonAccountManager import MastodonAccountManager
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
from TootCrawler import TootCrawler
|
||||
|
||||
locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
|
||||
init_db()
|
||||
|
||||
mastodonAccountManager = MastodonAccountManager()
|
||||
mastodonInstance = mastodonAccountManager.instance
|
||||
"""
|
||||
mastodonInstance.log_in(
|
||||
'USER-EMAIL',
|
||||
'PW',
|
||||
to_file = 'hedonodon_usercred.secret'
|
||||
)
|
||||
"""
|
||||
|
||||
tootCrawler = TootCrawler(mastodonInstance)
|
||||
crudManager = CRUDManager()
|
||||
|
||||
lastTootId = crudManager.getLastToot()
|
||||
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
|
||||
|
||||
if not tootsDataframe.empty:
|
||||
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
|
||||
else:
|
||||
print('Nothing changed since last database insert!')
|
||||
|
||||
sentimentsYesterday = calculateSentimentCount()
|
||||
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
|
||||
|
||||
if not tootsDataframe.empty:
|
||||
crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
|
||||
crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
|
||||
else:
|
||||
print('Nothing changed since last database insert!')
|
||||
|
||||
colormap = {
|
||||
'negative': '#ff9999',
|
||||
'neutral': '#ffcc99',
|
||||
"positive": '#99ff99'
|
||||
}
|
||||
|
||||
todaysColors = []
|
||||
for sentiment in sentimentsYesterday['sentiment'].to_numpy():
|
||||
todaysColors.append(colormap[sentiment])
|
||||
|
||||
|
||||
|
||||
TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
|
||||
dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
|
||||
dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
|
||||
|
||||
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
|
||||
|
||||
# Pie chart.
|
||||
pieChartlabels = dataframe4PieChart.index.to_numpy()
|
||||
pieChart = dataframe4PieChart.plot.pie(
|
||||
ax=axes[0],
|
||||
y='sentimentCount',
|
||||
ylabel="",
|
||||
labels=dataframe4PieChart['sentimentCount'],
|
||||
title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
|
||||
colors=todaysColors,
|
||||
wedgeprops=dict(linewidth=3, edgecolor='w'),
|
||||
startangle=90
|
||||
)
|
||||
|
||||
axes[0].axis('equal')
|
||||
centre_circle = plt.Circle((0, 0), 0.6, fc='white')
|
||||
axes[0].add_patch(centre_circle)
|
||||
chartBox = axes[0].get_position()
|
||||
axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
|
||||
|
||||
# Line chart.
|
||||
lineChart = dataframe4LineChart.plot.line(
|
||||
ax=axes[1],
|
||||
title='Mean of all sentiments from max positive (1) to min negative (-1)'
|
||||
)
|
||||
axes[1].grid(True)
|
||||
axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
|
||||
axes[1].set_ylim([-1, 1])
|
||||
axes[1].xaxis.set_major_locator(mdates.MonthLocator())
|
||||
axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
|
||||
axes[1].xaxis.set_major_formatter(plt.NullFormatter())
|
||||
axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
|
||||
axes[1].tick_params(which='minor', length=0)
|
||||
plotFileUrl = f'./plots/{TodayDate}.png'
|
||||
plt.savefig(plotFileUrl)
|
||||
|
||||
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
|
||||
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from mastodon import Mastodon
|
||||
|
||||
class MastodonAccountManager():
|
||||
def __init__(self):
|
||||
self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
|
||||
from mastodon import Mastodon
|
||||
|
||||
class MastodonAccountManager():
|
||||
def __init__(self):
|
||||
self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# Hedonodon
|
||||
I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
|
||||
|
||||
# Hedonodon
|
||||
I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
|
||||
|
||||
More Documentation coming soon!
|
||||
148
SentiTooter.py
148
SentiTooter.py
|
|
@ -1,74 +1,74 @@
|
|||
from germansentiment import SentimentModel
|
||||
import numpy as np
|
||||
from scipy.special import softmax
|
||||
from transformers import AutoModelForSequenceClassification
|
||||
from transformers import AutoTokenizer
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
|
||||
|
||||
# Preprocess text (username and link placeholders)
|
||||
def preprocess(text):
|
||||
new_text = []
|
||||
|
||||
for t in text.split(" "):
|
||||
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
||||
t = 'http' if t.startswith('http') else t
|
||||
new_text.append(t)
|
||||
return " ".join(new_text)
|
||||
|
||||
|
||||
class SentiTooter:
|
||||
""""""
|
||||
|
||||
def __init__(self):
|
||||
self.deModel = SentimentModel()
|
||||
self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
|
||||
self.enModel, self.enTokenizer = self.initModel()
|
||||
# https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
|
||||
self.labels = ['negative', 'neutral', 'positive']
|
||||
self.sia = SentimentIntensityAnalyzer()
|
||||
|
||||
def analyze(self, language, content):
|
||||
match language:
|
||||
case 'de':
|
||||
sentiment = self.deModel.predict_sentiment([content])
|
||||
sentiment.append('germanSentiment')
|
||||
return sentiment
|
||||
case 'en':
|
||||
text = preprocess(content)
|
||||
encoded_input = self.enTokenizer(text, return_tensors='pt')
|
||||
output = self.enModel(**encoded_input)
|
||||
scores = output[0][0].detach().numpy()
|
||||
scores = softmax(scores)
|
||||
sentimentIndexWithMaxScore = np.argmax(scores)
|
||||
sentimentLabel = self.labels[sentimentIndexWithMaxScore]
|
||||
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
|
||||
return sentiment
|
||||
case _:
|
||||
compound = self.sia.polarity_scores(content)['compound']
|
||||
if compound > (1 / 3):
|
||||
return ['positive', 'vaderSentiment']
|
||||
elif compound < (-1 / 3):
|
||||
return ['negative', 'vaderSentiment']
|
||||
else:
|
||||
return ['neutral', 'vaderSentiment']
|
||||
|
||||
|
||||
|
||||
def initModel(self):
|
||||
# PT
|
||||
tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
|
||||
tokenizer.save_pretrained(self.enModelType)
|
||||
model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
|
||||
model.save_pretrained(self.enModelType)
|
||||
return model, tokenizer
|
||||
|
||||
# # TF
|
||||
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
|
||||
# model.save_pretrained(MODEL)
|
||||
|
||||
# text = "Good night 😊"
|
||||
# encoded_input = tokenizer(text, return_tensors='tf')
|
||||
# output = model(encoded_input)
|
||||
# scores = output[0][0].numpy()
|
||||
# scores = softmax(scores)
|
||||
from germansentiment import SentimentModel
|
||||
import numpy as np
|
||||
from scipy.special import softmax
|
||||
from transformers import AutoModelForSequenceClassification
|
||||
from transformers import AutoTokenizer
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
|
||||
|
||||
# Preprocess text (username and link placeholders)
|
||||
def preprocess(text):
|
||||
new_text = []
|
||||
|
||||
for t in text.split(" "):
|
||||
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
||||
t = 'http' if t.startswith('http') else t
|
||||
new_text.append(t)
|
||||
return " ".join(new_text)
|
||||
|
||||
|
||||
class SentiTooter:
|
||||
""""""
|
||||
|
||||
def __init__(self):
|
||||
self.deModel = SentimentModel()
|
||||
self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
|
||||
self.enModel, self.enTokenizer = self.initModel()
|
||||
# https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
|
||||
self.labels = ['negative', 'neutral', 'positive']
|
||||
self.sia = SentimentIntensityAnalyzer()
|
||||
|
||||
def analyze(self, language, content):
|
||||
match language:
|
||||
case 'de':
|
||||
sentiment = self.deModel.predict_sentiment([content])
|
||||
sentiment.append('germanSentiment')
|
||||
return sentiment
|
||||
case 'en':
|
||||
text = preprocess(content)
|
||||
encoded_input = self.enTokenizer(text, return_tensors='pt')
|
||||
output = self.enModel(**encoded_input)
|
||||
scores = output[0][0].detach().numpy()
|
||||
scores = softmax(scores)
|
||||
sentimentIndexWithMaxScore = np.argmax(scores)
|
||||
sentimentLabel = self.labels[sentimentIndexWithMaxScore]
|
||||
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
|
||||
return sentiment
|
||||
case _:
|
||||
compound = self.sia.polarity_scores(content)['compound']
|
||||
if compound > (1 / 3):
|
||||
return ['positive', 'vaderSentiment']
|
||||
elif compound < (-1 / 3):
|
||||
return ['negative', 'vaderSentiment']
|
||||
else:
|
||||
return ['neutral', 'vaderSentiment']
|
||||
|
||||
|
||||
|
||||
def initModel(self):
|
||||
# PT
|
||||
tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
|
||||
tokenizer.save_pretrained(self.enModelType)
|
||||
model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
|
||||
model.save_pretrained(self.enModelType)
|
||||
return model, tokenizer
|
||||
|
||||
# # TF
|
||||
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
|
||||
# model.save_pretrained(MODEL)
|
||||
|
||||
# text = "Good night 😊"
|
||||
# encoded_input = tokenizer(text, return_tensors='tf')
|
||||
# output = model(encoded_input)
|
||||
# scores = output[0][0].numpy()
|
||||
# scores = softmax(scores)
|
||||
|
|
|
|||
62
Tables.py
62
Tables.py
|
|
@ -1,32 +1,32 @@
|
|||
from DbSetup import Base
|
||||
from sqlalchemy import Column, Date, Integer, Float, String
|
||||
|
||||
class Toots(Base):
|
||||
__tablename__ = 'Toots'
|
||||
__table_args__ = {'extend_existing': True}
|
||||
index = Column(Integer, primary_key=True)
|
||||
model = Column(String(30))
|
||||
datetime = Column(Date)
|
||||
language = Column(String(3))
|
||||
sentiment = Column(String(8))
|
||||
tootId = Column(String(255))
|
||||
toot = Column(String(600))
|
||||
userName = Column(String(255))
|
||||
userId = Column(String(255))
|
||||
|
||||
|
||||
|
||||
class SentimentCounts(Base):
|
||||
__tablename__ = 'SentimentCounts'
|
||||
__table_args__ = {'extend_existing': True}
|
||||
index = Column(Integer, primary_key=True)
|
||||
sentimentCount = Column(Integer)
|
||||
date = Column(Date, primary_key=True)
|
||||
sentiment = Column(String(8))
|
||||
|
||||
class SentimentMeans(Base):
|
||||
__tablename__ = 'SentimentMeans'
|
||||
__table_args__ = {'extend_existing': True}
|
||||
index = Column(Integer, primary_key=True)
|
||||
date = Column(Date, primary_key=True)
|
||||
from DbSetup import Base
|
||||
from sqlalchemy import Column, Date, Integer, Float, String
|
||||
|
||||
class Toots(Base):
|
||||
__tablename__ = 'Toots'
|
||||
__table_args__ = {'extend_existing': True}
|
||||
index = Column(Integer, primary_key=True)
|
||||
model = Column(String(30))
|
||||
datetime = Column(Date)
|
||||
language = Column(String(3))
|
||||
sentiment = Column(String(8))
|
||||
tootId = Column(String(255))
|
||||
toot = Column(String(600))
|
||||
userName = Column(String(255))
|
||||
userId = Column(String(255))
|
||||
|
||||
|
||||
|
||||
class SentimentCounts(Base):
|
||||
__tablename__ = 'SentimentCounts'
|
||||
__table_args__ = {'extend_existing': True}
|
||||
index = Column(Integer, primary_key=True)
|
||||
sentimentCount = Column(Integer)
|
||||
date = Column(Date, primary_key=True)
|
||||
sentiment = Column(String(8))
|
||||
|
||||
class SentimentMeans(Base):
|
||||
__tablename__ = 'SentimentMeans'
|
||||
__table_args__ = {'extend_existing': True}
|
||||
index = Column(Integer, primary_key=True)
|
||||
date = Column(Date, primary_key=True)
|
||||
SentimentsMean = Column(Float)
|
||||
|
|
@ -1,48 +1,48 @@
|
|||
from langdetect import detect
|
||||
import pytz
|
||||
import pandas as pd
|
||||
import re
|
||||
from SentiTooter import SentiTooter
|
||||
from pprint import pprint
|
||||
|
||||
class TootCrawler():
|
||||
|
||||
def __init__(self, mastodonInstance) -> None:
|
||||
self.mastodonInstance = mastodonInstance
|
||||
self.compilePattern = re.compile('<.*?>')
|
||||
self.sentiTooter = SentiTooter()
|
||||
self.localTimezone = pytz.timezone('Europe/Berlin')
|
||||
|
||||
def getLocalTimeline(self, minId=None):
|
||||
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
|
||||
|
||||
def cleanhtml(self, raw_html):
|
||||
cleantext = re.sub(self.compilePattern, '', raw_html)
|
||||
cleantext = re.sub(r'http\S+', '', cleantext)
|
||||
return cleantext
|
||||
|
||||
def buildTootsDataframe(self, minId=None):
|
||||
toots = []
|
||||
allTimelineResults = []
|
||||
timelinePagination = self.getLocalTimeline(minId)
|
||||
|
||||
while timelinePagination:
|
||||
allTimelineResults = allTimelineResults + timelinePagination
|
||||
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
|
||||
for i in allTimelineResults:
|
||||
content = self.cleanhtml(i.content)
|
||||
language = detect(content)
|
||||
sentiment = self.sentiTooter.analyze(language, content)
|
||||
toot = {
|
||||
"sentiment": sentiment[0],
|
||||
"model": sentiment[1],
|
||||
"toot": content,
|
||||
"datetime": i.created_at.astimezone(self.localTimezone),
|
||||
"language": language,
|
||||
"userName": i.account.display_name,
|
||||
"userId": i.account.id,
|
||||
"tootId": i.id
|
||||
}
|
||||
toots.append(toot)
|
||||
toots.sort(key=lambda item:item.get('datetime'))
|
||||
from langdetect import detect
|
||||
import pytz
|
||||
import pandas as pd
|
||||
import re
|
||||
from SentiTooter import SentiTooter
|
||||
from pprint import pprint
|
||||
|
||||
class TootCrawler():
|
||||
|
||||
def __init__(self, mastodonInstance) -> None:
|
||||
self.mastodonInstance = mastodonInstance
|
||||
self.compilePattern = re.compile('<.*?>')
|
||||
self.sentiTooter = SentiTooter()
|
||||
self.localTimezone = pytz.timezone('Europe/Berlin')
|
||||
|
||||
def getLocalTimeline(self, minId=None):
|
||||
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
|
||||
|
||||
def cleanhtml(self, raw_html):
|
||||
cleantext = re.sub(self.compilePattern, '', raw_html)
|
||||
cleantext = re.sub(r'http\S+', '', cleantext)
|
||||
return cleantext
|
||||
|
||||
def buildTootsDataframe(self, minId=None):
|
||||
toots = []
|
||||
allTimelineResults = []
|
||||
timelinePagination = self.getLocalTimeline(minId)
|
||||
|
||||
while timelinePagination:
|
||||
allTimelineResults = allTimelineResults + timelinePagination
|
||||
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
|
||||
for i in allTimelineResults:
|
||||
content = self.cleanhtml(i.content)
|
||||
language = detect(content)
|
||||
sentiment = self.sentiTooter.analyze(language, content)
|
||||
toot = {
|
||||
"sentiment": sentiment[0],
|
||||
"model": sentiment[1],
|
||||
"toot": content,
|
||||
"datetime": i.created_at.astimezone(self.localTimezone),
|
||||
"language": language,
|
||||
"userName": i.account.display_name,
|
||||
"userId": i.account.id,
|
||||
"tootId": i.id
|
||||
}
|
||||
toots.append(toot)
|
||||
toots.sort(key=lambda item:item.get('datetime'))
|
||||
return pd.DataFrame.from_records(toots)
|
||||
|
|
@ -1,8 +1,12 @@
|
|||
mastodon.py
|
||||
matplotlib
|
||||
pandas
|
||||
sqlalchemy
|
||||
vader-multi
|
||||
numpy
|
||||
pytz
|
||||
transformers
|
||||
mastodon.py
|
||||
matplotlib
|
||||
pandas
|
||||
sqlalchemy
|
||||
vader-multi
|
||||
langdetect
|
||||
numpy
|
||||
pytz
|
||||
transformers
|
||||
wheel
|
||||
germansentiment
|
||||
scipy
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue