made hedonodon server ready
This commit is contained in:
parent
52223192b4
commit
2b98565444
10 changed files with 342 additions and 336 deletions
|
|
@ -1,48 +1,48 @@
|
|||
from langdetect import detect
|
||||
import pytz
|
||||
import pandas as pd
|
||||
import re
|
||||
from SentiTooter import SentiTooter
|
||||
from pprint import pprint
|
||||
|
||||
class TootCrawler():
|
||||
|
||||
def __init__(self, mastodonInstance) -> None:
|
||||
self.mastodonInstance = mastodonInstance
|
||||
self.compilePattern = re.compile('<.*?>')
|
||||
self.sentiTooter = SentiTooter()
|
||||
self.localTimezone = pytz.timezone('Europe/Berlin')
|
||||
|
||||
def getLocalTimeline(self, minId=None):
|
||||
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
|
||||
|
||||
def cleanhtml(self, raw_html):
|
||||
cleantext = re.sub(self.compilePattern, '', raw_html)
|
||||
cleantext = re.sub(r'http\S+', '', cleantext)
|
||||
return cleantext
|
||||
|
||||
def buildTootsDataframe(self, minId=None):
|
||||
toots = []
|
||||
allTimelineResults = []
|
||||
timelinePagination = self.getLocalTimeline(minId)
|
||||
|
||||
while timelinePagination:
|
||||
allTimelineResults = allTimelineResults + timelinePagination
|
||||
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
|
||||
for i in allTimelineResults:
|
||||
content = self.cleanhtml(i.content)
|
||||
language = detect(content)
|
||||
sentiment = self.sentiTooter.analyze(language, content)
|
||||
toot = {
|
||||
"sentiment": sentiment[0],
|
||||
"model": sentiment[1],
|
||||
"toot": content,
|
||||
"datetime": i.created_at.astimezone(self.localTimezone),
|
||||
"language": language,
|
||||
"userName": i.account.display_name,
|
||||
"userId": i.account.id,
|
||||
"tootId": i.id
|
||||
}
|
||||
toots.append(toot)
|
||||
toots.sort(key=lambda item:item.get('datetime'))
|
||||
from langdetect import detect
|
||||
import pytz
|
||||
import pandas as pd
|
||||
import re
|
||||
from SentiTooter import SentiTooter
|
||||
from pprint import pprint
|
||||
|
||||
class TootCrawler():
|
||||
|
||||
def __init__(self, mastodonInstance) -> None:
|
||||
self.mastodonInstance = mastodonInstance
|
||||
self.compilePattern = re.compile('<.*?>')
|
||||
self.sentiTooter = SentiTooter()
|
||||
self.localTimezone = pytz.timezone('Europe/Berlin')
|
||||
|
||||
def getLocalTimeline(self, minId=None):
|
||||
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
|
||||
|
||||
def cleanhtml(self, raw_html):
|
||||
cleantext = re.sub(self.compilePattern, '', raw_html)
|
||||
cleantext = re.sub(r'http\S+', '', cleantext)
|
||||
return cleantext
|
||||
|
||||
def buildTootsDataframe(self, minId=None):
|
||||
toots = []
|
||||
allTimelineResults = []
|
||||
timelinePagination = self.getLocalTimeline(minId)
|
||||
|
||||
while timelinePagination:
|
||||
allTimelineResults = allTimelineResults + timelinePagination
|
||||
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
|
||||
for i in allTimelineResults:
|
||||
content = self.cleanhtml(i.content)
|
||||
language = detect(content)
|
||||
sentiment = self.sentiTooter.analyze(language, content)
|
||||
toot = {
|
||||
"sentiment": sentiment[0],
|
||||
"model": sentiment[1],
|
||||
"toot": content,
|
||||
"datetime": i.created_at.astimezone(self.localTimezone),
|
||||
"language": language,
|
||||
"userName": i.account.display_name,
|
||||
"userId": i.account.id,
|
||||
"tootId": i.id
|
||||
}
|
||||
toots.append(toot)
|
||||
toots.sort(key=lambda item:item.get('datetime'))
|
||||
return pd.DataFrame.from_records(toots)
|
||||
Loading…
Add table
Add a link
Reference in a new issue