SlideSpeak
diff --git a/‎app.py‎
Lines changed: 138 additions & 12 deletions b/‎app.py‎
Lines changed: 138 additions & 12 deletions
diff --git a/‎file_utils.py‎
Lines changed: 5 additions & 4 deletions b/‎file_utils.py‎
Lines changed: 5 additions & 4 deletions
@@ -1,19 +1,145 @@
-import subprocess
+import os
+import time
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import as_completed
 
+from flask import Flask, request, jsonify, make_response, Response
+from flask_cors import CORS
+from werkzeug.utils import secure_filename
+from file_utils import ppt_preview
+from upload_s3 import upload_file_to_s3
 
-def main():
-    server = subprocess.Popen(["python3", "index_server.py"])
-    flask_demo = subprocess.Popen(["python3", "flask_demo.py"])
+from index_server import DocumentManager
 
+app = Flask(__name__)
+app.response_buffering = False
+CORS(app)
+
+app.config["DEBUG"] = True
+
+document_manager = DocumentManager()
+# document_manager.initialize_index()
+
+
+@app.route("/stream")
+def stream():
+    query_text = request.args.get("text", None)
+    request.args.get("doc_id", None)
+    uuid_id = request.args.get("uuid", None)
+    if query_text is None:
+        return "No text found, please include a ?text=blah parameter in the URL", 400
+
+    if uuid_id is None:
+        return "No text found, please include a ?text=blah parameter in the URL", 400
+    document_manager.initialize_index(uuid_id)
+    answer_stream = document_manager.query_stream(query_text, uuid_id)
+
+    return Response(answer_stream, mimetype="text/event-stream")
+
+
+# TODO: Can we delete this route?
+@app.route("/query", methods=["GET"])
+def query_index():
+    query_text = request.args.get("text", None)
+    query_doc_id = request.args.get("doc_id", None)
+    uuid_id = request.args.get("uuid", None)
+    if query_text is None:
+        return "No text found, please include a ?text=blah parameter in the URL", 400
+    if uuid_id is None:
+        return "No UUID found, please include a uuid in the URL", 400
+
+    response = document_manager.query_index(query_text, query_doc_id)._getvalue()
+    response_json = {
+        "text": str(response),
+    }
+    return make_response(jsonify(response_json)), 200
+
+
+@app.route("/uploadFile", methods=["POST"])
+def upload_file():
+    if "file" not in request.files:
+        return "Please send a POST request with a file", 400
+
+    filepath = None
     try:
-        # Wait for both subprocesses to finish
-        server.communicate()
-        flask_demo.communicate()
-    except KeyboardInterrupt:
-        # If the user presses CTRL+C, terminate both subprocesses
-        server.terminate()
-        flask_demo.terminate()
+        generated_uuid = str(uuid.uuid4())
+        uploaded_file = request.files["file"]
+        filename = secure_filename(uploaded_file.filename)
+        print('filename')
+        print(uploaded_file)
+        filepath = os.path.join("documents", os.path.basename(filename))
+
+        start_time = time.time()
+        uploaded_file.save(filepath)
+        print("Saving the local PPT file: {:.2f}s".format(time.time() - start_time))
+
+        start_time = time.time()
+        if request.form.get("filename_as_doc_id", None) is not None:
+            document_manager.insert_into_index(filepath, doc_id=filename)
+        else:
+            document_manager.insert_into_index(filepath, generated_uuid)
+        print(
+            "Inserted into llama index: {:.2f}s".format(time.time() - start_time)
+        )
+    except Exception as e:
+        print(e)
+        # cleanup temp file
+        if filepath is not None and os.path.exists(filepath):
+            os.remove(filepath)
+        return "Error: {}".format(str(e)), 500
+
+    print('upload to s3')
+    # upload file to s3
+    start_time = time.time()
+    upload_file_to_s3(
+        filepath,
+        "slidespeak-files",
+        generated_uuid + os.path.splitext(filepath)[1],
+    )
+    print("Upload PPT to S3: {:.2f}s".format(time.time() - start_time))
+
+    
+
+    start_time = time.time()
+    preview_file_paths = ppt_preview(
+        filepath, "preview_images/" + generated_uuid + ".jpg"
+    )
+    print("Generating PPT preview: {:.2f}s".format(time.time() - start_time))
+    
+    # delete file after upload
+    if os.path.exists(filepath):
+        os.remove(filepath)
+    preview_urls_dict = {}
+
+    if len(preview_file_paths) > 0:
+        # Make a list of all futures for the uploads
+        for preview_file_path in preview_file_paths:
+            try:
+                index = preview_file_paths.index(preview_file_path)
+                preview_urls_dict[index] = upload_file_to_s3(
+                    preview_file_path,
+                    "slidespeak-files",
+                    "preview-images/" + os.path.basename(preview_file_path)
+                )
+                if os.path.exists(preview_file_path):
+                    os.remove(preview_file_path)
+            except Exception as exc:
+                print(f"{preview_file_path} generated an exception: {exc}")
+
+    # Convert dict to list in correct order
+    preview_urls = [preview_urls_dict[i] for i in sorted(preview_urls_dict.keys())]
+    print('i am going to return soon')
+    return (
+        make_response(jsonify({"uuid": generated_uuid, "previewUrls": preview_urls})),
+        200,
+    )
+
+
+@app.route("/")
+def home():
+    return "Hello, World! Welcome to the llama_index docker image!"
 
 
 if __name__ == "__main__":
-    main()
+    app.run(host="0.0.0.0", port=8000)
@@ -3,7 +3,6 @@
 import subprocess
 from pdf2image import convert_from_path
 
-
 def search_and_extract(zip_filepath, target_files, extract_to):
     # Ensure the target directory exists
     if not os.path.exists(extract_to):
@@ -31,10 +30,14 @@ def ppt_preview(ppt_file_path, preview_file_path):
 
     # Generate a temporary pdf path
     pdf_file_path = os.path.splitext(ppt_file_path)[0] + ".pdf"
-    print(pdf_file_path)
 
     # Convert PowerPoint to PDF using unoconv
     subprocess.run(["unoconv", "-f", "pdf", "-o", pdf_file_path, ppt_file_path])
+    if os.path.exists(pdf_file_path):
+        print(f"{pdf_file_path} exists!")
+    else:
+        print(f"{pdf_file_path} does not exist.")
+    print(pdf_file_path)
 
     # Convert PDF to list of images
     images = convert_from_path(pdf_file_path)
@@ -44,7 +47,5 @@ def ppt_preview(ppt_file_path, preview_file_path):
         fname = os.path.splitext(preview_file_path)[0] + f"-{i}.jpg"
         image.save(fname, "JPEG")
         preview_file_paths.append(fname)
-    # Save the first image (the first slide of the ppt) to the preview_file_path
-    # images[0].save(preview_file_path, "JPEG")
 
     return preview_file_paths