-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
77 lines (60 loc) · 2.92 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import requests
import json
from typing import Union
from fastapi import FastAPI
app = FastAPI()
@app.get("/")
def read_root():
return {"Hello": "World"}
@app.get("/items/{item_id}")
def read_item(item_id: int, arg_1: int, arg_2: int):
if item_id == 1:
thread_id = arg_1
page = arg_2
post_data = get_thread_data(thread_id, page)
jason_son =
#return jason
return {"error": 418}
#([post_ids[i], post_times[i], post_bodies[i], post_authors[i], post_author_userids[i], post_author_member_types[i]])
testthread_id = 46708
#pfp format https://forumbucket.us-southeast-1.linodeobjects.com/data/avatars/m/21/USERID.jpg
def get_thread_data(thread_id, page):
r = requests.get('https://68kmla.org/bb/index.php?threads/foo.'+str(thread_id)+'/page-'+str(page))
posts = r.text.split('<article class="message message--post')
del posts[0]
post_ids = [i.split('data-content="post-')[1].split('"')[0] for i in posts]
post_authors = [i.split('data-author="')[1].split('"')[0] for i in posts]
post_author_userids = [i.split('/" class="avatar avatar')[0].split('.')[-1] for i in posts]
post_author_member_types = [i.split('itemprop="jobTitle">')[1].split('</h5>')[0] for i in posts]
post_times = [i.split(' data-time="')[1].split('"')[0] for i in posts]
post_bodies = [i.split('<div class="bbWrapper">')[1].split('<div class="js-selectToQuoteEnd"> </div>')[0] for i in posts]
post_bodies = [cleanup_quotes(i) for i in post_bodies]
post_bodies = [cleanup_links(i) for i in post_bodies]
out_data = []
for i in range(len(post_ids)):
out_data.append([post_ids[i], post_times[i], post_bodies[i], post_authors[i], post_author_userids[i], post_author_member_types[i]])
return out_data
def cleanup_quotes(post):
fixed = ''.join(list(post)[0:-14])
if '<blockquote' not in post:
return fixed
split = fixed.split('<blockquote')
del split[0]
data_sources = [i.split('data-source="post: ')[1].split('"')[0] for i in split]
quote_titles = [i.split('#post-'+data_sources[split.index(i)]+'">')[1].split('</a>')[0] for i in split]
quote_content = [i.split('<div class="bbCodeBlock-expandContent js-expandContent ">')[1].split('</div>')[0] for i in split]
quote_content = [''.join(list(i)[4:-3]) for i in quote_content]
after_quote = []
split2 = fixed.split('blockquote>')
for i in range(len(split2)):
if 'data-attributes="' in split2[i]:
after_quote.append(split2[i+1])
final = []
for i in range(len(quote_titles)):
final.append('<quote><title>'+quote_titles[i]+'</title><body>'+quote_content[i]+'</body></quote>'+after_quote[i])
return ''.join(final)
def cleanup_links(body):
killed = [i.split('</a>')[-1] + ' there was an image/link here ' for i in body.split('<a href=') if '<img src' not in i]
return ''.join(killed)
test_data = get_thread_data(testthread_id, 1)
print(test_data)