Now using language dependent senti analizier. no compound score anymore.

This commit is contained in:
rnsrk 2023-01-05 01:43:11 +01:00
parent f0d4eadf28
commit a20f7331bb
8 changed files with 153 additions and 72 deletions

View file

@ -1,10 +1,10 @@
from langdetect import detect
import pytz
import pandas as pd
import re
from SentiTooter import SentiTooter
from pprint import pprint
class TootCrawler():
def __init__(self, mastodonInstance) -> None:
@ -13,29 +13,34 @@ class TootCrawler():
self.sentiTooter = SentiTooter()
self.localTimezone = pytz.timezone('Europe/Berlin')
def getLocalTimeline(self, sinceId=None):
return self.mastodonInstance.timeline_local(since_id=sinceId)
def getLocalTimeline(self, minId=None):
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
def cleanhtml(self, raw_html):
cleantext = re.sub(self.compilePattern, '', raw_html)
cleantext = re.sub(r'http\S+', '', cleantext)
return cleantext
def buildTootsDataframe(self, sinceId=None):
def buildTootsDataframe(self, minId=None):
toots = []
allTimelineResults = []
timelinePagination = self.getLocalTimeline(minId)
for i in self.getLocalTimeline(sinceId):
while timelinePagination:
allTimelineResults = allTimelineResults + timelinePagination
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
for i in allTimelineResults:
content = self.cleanhtml(i.content)
sentiment = self.sentiTooter.analyze(i)
toots.append(
{
"sentiment": sentiment[0],
"compound": sentiment[1],
"model": sentiment[1],
"userName": i.account.display_name,
"userId": i.account.id,
"toot": content,
"datetime": i.created_at.astimezone(self.localTimezone),
"language": i.language,
"language": detect(content),
"tootId": i.id
}
)