made hedonodon server ready

2023-01-27 21:08:25 +01:00 · 2023-01-27 21:08:25 +01:00 · 2b98565444
commit 2b98565444
parent 52223192b4
10 changed files with 342 additions and 336 deletions
--- a/TootCrawler.py
+++ b/TootCrawler.py
@ -1,48 +1,48 @@
-from langdetect import detect
-import pytz
-import pandas as pd
-import re
-from SentiTooter import SentiTooter
-from pprint import pprint
-
-class TootCrawler():
-
-    def __init__(self, mastodonInstance) -> None:
-        self.mastodonInstance = mastodonInstance
-        self.compilePattern = re.compile('<.*?>')
-        self.sentiTooter = SentiTooter()
-        self.localTimezone = pytz.timezone('Europe/Berlin')
-
-    def getLocalTimeline(self, minId=None):
-        return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
-
-    def cleanhtml(self, raw_html):
-        cleantext = re.sub(self.compilePattern, '', raw_html)
-        cleantext = re.sub(r'http\S+', '', cleantext)
-        return cleantext
-
-    def buildTootsDataframe(self, minId=None):
-        toots = []
-        allTimelineResults = []
-        timelinePagination = self.getLocalTimeline(minId)
-
-        while timelinePagination:
-            allTimelineResults = allTimelineResults + timelinePagination
-            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
-        for i in allTimelineResults:
-            content = self.cleanhtml(i.content)
-            language = detect(content)
-            sentiment = self.sentiTooter.analyze(language, content)
-            toot = {
-                "sentiment": sentiment[0],
-                "model": sentiment[1],
-                "toot": content,
-                "datetime": i.created_at.astimezone(self.localTimezone),
-                "language": language,
-                "userName": i.account.display_name,
-                "userId": i.account.id,
-                "tootId": i.id
-            }
-            toots.append(toot)
-        toots.sort(key=lambda item:item.get('datetime'))
+from langdetect import detect
+import pytz
+import pandas as pd
+import re
+from SentiTooter import SentiTooter
+from pprint import pprint
+
+class TootCrawler():
+
+    def __init__(self, mastodonInstance) -> None:
+        self.mastodonInstance = mastodonInstance
+        self.compilePattern = re.compile('<.*?>')
+        self.sentiTooter = SentiTooter()
+        self.localTimezone = pytz.timezone('Europe/Berlin')
+
+    def getLocalTimeline(self, minId=None):
+        return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
+
+    def cleanhtml(self, raw_html):
+        cleantext = re.sub(self.compilePattern, '', raw_html)
+        cleantext = re.sub(r'http\S+', '', cleantext)
+        return cleantext
+
+    def buildTootsDataframe(self, minId=None):
+        toots = []
+        allTimelineResults = []
+        timelinePagination = self.getLocalTimeline(minId)
+
+        while timelinePagination:
+            allTimelineResults = allTimelineResults + timelinePagination
+            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
+        for i in allTimelineResults:
+            content = self.cleanhtml(i.content)
+            language = detect(content)
+            sentiment = self.sentiTooter.analyze(language, content)
+            toot = {
+                "sentiment": sentiment[0],
+                "model": sentiment[1],
+                "toot": content,
+                "datetime": i.created_at.astimezone(self.localTimezone),
+                "language": language,
+                "userName": i.account.display_name,
+                "userId": i.account.id,
+                "tootId": i.id
+            }
+            toots.append(toot)
+        toots.sort(key=lambda item:item.get('datetime'))
        return pd.DataFrame.from_records(toots)