Skip to content

Commit 0611c8b

Browse files
committed
feat: add gunicorn
1 parent b8745c2 commit 0611c8b

File tree

4 files changed

+212
-150
lines changed

4 files changed

+212
-150
lines changed

app.py

Lines changed: 138 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,145 @@
1-
import subprocess
1+
import os
2+
import time
3+
import uuid
4+
from concurrent.futures import ThreadPoolExecutor
5+
from concurrent.futures import as_completed
26

7+
from flask import Flask, request, jsonify, make_response, Response
8+
from flask_cors import CORS
9+
from werkzeug.utils import secure_filename
10+
from file_utils import ppt_preview
11+
from upload_s3 import upload_file_to_s3
312

4-
def main():
5-
server = subprocess.Popen(["python3", "index_server.py"])
6-
flask_demo = subprocess.Popen(["python3", "flask_demo.py"])
13+
from index_server import DocumentManager
714

15+
app = Flask(__name__)
16+
app.response_buffering = False
17+
CORS(app)
18+
19+
app.config["DEBUG"] = True
20+
21+
document_manager = DocumentManager()
22+
# document_manager.initialize_index()
23+
24+
25+
@app.route("/stream")
26+
def stream():
27+
query_text = request.args.get("text", None)
28+
request.args.get("doc_id", None)
29+
uuid_id = request.args.get("uuid", None)
30+
if query_text is None:
31+
return "No text found, please include a ?text=blah parameter in the URL", 400
32+
33+
if uuid_id is None:
34+
return "No text found, please include a ?text=blah parameter in the URL", 400
35+
document_manager.initialize_index(uuid_id)
36+
answer_stream = document_manager.query_stream(query_text, uuid_id)
37+
38+
return Response(answer_stream, mimetype="text/event-stream")
39+
40+
41+
# TODO: Can we delete this route?
42+
@app.route("/query", methods=["GET"])
43+
def query_index():
44+
query_text = request.args.get("text", None)
45+
query_doc_id = request.args.get("doc_id", None)
46+
uuid_id = request.args.get("uuid", None)
47+
if query_text is None:
48+
return "No text found, please include a ?text=blah parameter in the URL", 400
49+
if uuid_id is None:
50+
return "No UUID found, please include a uuid in the URL", 400
51+
52+
response = document_manager.query_index(query_text, query_doc_id)._getvalue()
53+
response_json = {
54+
"text": str(response),
55+
}
56+
return make_response(jsonify(response_json)), 200
57+
58+
59+
@app.route("/uploadFile", methods=["POST"])
60+
def upload_file():
61+
if "file" not in request.files:
62+
return "Please send a POST request with a file", 400
63+
64+
filepath = None
865
try:
9-
# Wait for both subprocesses to finish
10-
server.communicate()
11-
flask_demo.communicate()
12-
except KeyboardInterrupt:
13-
# If the user presses CTRL+C, terminate both subprocesses
14-
server.terminate()
15-
flask_demo.terminate()
66+
generated_uuid = str(uuid.uuid4())
67+
uploaded_file = request.files["file"]
68+
filename = secure_filename(uploaded_file.filename)
69+
print('filename')
70+
print(uploaded_file)
71+
filepath = os.path.join("documents", os.path.basename(filename))
72+
73+
start_time = time.time()
74+
uploaded_file.save(filepath)
75+
print("Saving the local PPT file: {:.2f}s".format(time.time() - start_time))
76+
77+
start_time = time.time()
78+
if request.form.get("filename_as_doc_id", None) is not None:
79+
document_manager.insert_into_index(filepath, doc_id=filename)
80+
else:
81+
document_manager.insert_into_index(filepath, generated_uuid)
82+
print(
83+
"Inserted into llama index: {:.2f}s".format(time.time() - start_time)
84+
)
85+
except Exception as e:
86+
print(e)
87+
# cleanup temp file
88+
if filepath is not None and os.path.exists(filepath):
89+
os.remove(filepath)
90+
return "Error: {}".format(str(e)), 500
91+
92+
print('upload to s3')
93+
# upload file to s3
94+
start_time = time.time()
95+
upload_file_to_s3(
96+
filepath,
97+
"slidespeak-files",
98+
generated_uuid + os.path.splitext(filepath)[1],
99+
)
100+
print("Upload PPT to S3: {:.2f}s".format(time.time() - start_time))
101+
102+
103+
104+
start_time = time.time()
105+
preview_file_paths = ppt_preview(
106+
filepath, "preview_images/" + generated_uuid + ".jpg"
107+
)
108+
print("Generating PPT preview: {:.2f}s".format(time.time() - start_time))
109+
110+
# delete file after upload
111+
if os.path.exists(filepath):
112+
os.remove(filepath)
113+
preview_urls_dict = {}
114+
115+
if len(preview_file_paths) > 0:
116+
# Make a list of all futures for the uploads
117+
for preview_file_path in preview_file_paths:
118+
try:
119+
index = preview_file_paths.index(preview_file_path)
120+
preview_urls_dict[index] = upload_file_to_s3(
121+
preview_file_path,
122+
"slidespeak-files",
123+
"preview-images/" + os.path.basename(preview_file_path)
124+
)
125+
if os.path.exists(preview_file_path):
126+
os.remove(preview_file_path)
127+
except Exception as exc:
128+
print(f"{preview_file_path} generated an exception: {exc}")
129+
130+
# Convert dict to list in correct order
131+
preview_urls = [preview_urls_dict[i] for i in sorted(preview_urls_dict.keys())]
132+
print('i am going to return soon')
133+
return (
134+
make_response(jsonify({"uuid": generated_uuid, "previewUrls": preview_urls})),
135+
200,
136+
)
137+
138+
139+
@app.route("/")
140+
def home():
141+
return "Hello, World! Welcome to the llama_index docker image!"
16142

17143

18144
if __name__ == "__main__":
19-
main()
145+
app.run(host="0.0.0.0", port=8000)

file_utils.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import subprocess
44
from pdf2image import convert_from_path
55

6-
76
def search_and_extract(zip_filepath, target_files, extract_to):
87
# Ensure the target directory exists
98
if not os.path.exists(extract_to):
@@ -31,10 +30,14 @@ def ppt_preview(ppt_file_path, preview_file_path):
3130

3231
# Generate a temporary pdf path
3332
pdf_file_path = os.path.splitext(ppt_file_path)[0] + ".pdf"
34-
print(pdf_file_path)
3533

3634
# Convert PowerPoint to PDF using unoconv
3735
subprocess.run(["unoconv", "-f", "pdf", "-o", pdf_file_path, ppt_file_path])
36+
if os.path.exists(pdf_file_path):
37+
print(f"{pdf_file_path} exists!")
38+
else:
39+
print(f"{pdf_file_path} does not exist.")
40+
print(pdf_file_path)
3841

3942
# Convert PDF to list of images
4043
images = convert_from_path(pdf_file_path)
@@ -44,7 +47,5 @@ def ppt_preview(ppt_file_path, preview_file_path):
4447
fname = os.path.splitext(preview_file_path)[0] + f"-{i}.jpg"
4548
image.save(fname, "JPEG")
4649
preview_file_paths.append(fname)
47-
# Save the first image (the first slide of the ppt) to the preview_file_path
48-
# images[0].save(preview_file_path, "JPEG")
4950

5051
return preview_file_paths

0 commit comments

Comments
 (0)