import os
import pdfplumber
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
from werkzeug.utils import secure_filename

app = Flask(__name__)
CORS(app)

# === Config ===
UPLOAD_DIR = os.getenv("UPLOAD_DIR", os.path.join(os.getcwd(), "patternpy_pdf_files"))
OUTPUT_DIR = os.getenv("OUTPUT_DIR", os.path.join(os.getcwd(), "patternpy_text_output"))
ALLOWED_EXTENSIONS = {"pdf"}

BORDER_MARKER = "!@#"

os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)


# === Helpers ===
def allowed_file(filename):
    return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS


def detect_horizontal_borders(page):
    """Detect horizontal table borders using lines & rectangles"""
    borders = set()

    for l in page.lines:
        if abs(l["top"] - l["bottom"]) < 1:  # horizontal line
            borders.add(round(l["top"], 1))

    for r in page.rects:
        borders.add(round(r["top"], 1))
        borders.add(round(r["bottom"], 1))

    return borders


# === Route ===
@app.route("/patternpy", methods=["POST"])
def extract_pdf_text():
    if "file" not in request.files:
        return jsonify({"error": "No file provided"}), 400

    file = request.files["file"]

    if file.filename == "":
        return jsonify({"error": "No selected file"}), 400

    if not allowed_file(file.filename):
        return jsonify({"error": "Only PDF files are allowed"}), 400

    filename = secure_filename(file.filename)
    filepath = os.path.join(UPLOAD_DIR, filename)
    file.save(filepath)

    output_lines = []

    try:
        with pdfplumber.open(filepath) as pdf:
            for page_no, page in enumerate(pdf.pages, start=1):

                words = page.extract_words(x_tolerance=2, y_tolerance=3)
                horizontal_borders = detect_horizontal_borders(page)

                # Group words by Y position
                lines_dict = {}
                for w in words:
                    y = round(w["top"], 1)
                    lines_dict.setdefault(y, []).append(w)

                sorted_lines = sorted(lines_dict.items(), key=lambda x: x[0])

                output_lines.append(f"\n<<<PAGE_{page_no}_START>>>\n")

                for y, line_words in sorted_lines:

                    # 🔲 Insert border marker if detected
                    for border_y in horizontal_borders:
                        if abs(border_y - y) < 2:
                            output_lines.append(BORDER_MARKER * 1)
                            break

                    # Sort words left to right
                    line_words_sorted = sorted(line_words, key=lambda x: x["x0"])

                    line_text = ""
                    last_x = 0

                    for w in line_words_sorted:
                        gap = w["x0"] - last_x

                        # Long space → column separator
                        if gap > 40:
                            line_text += " || "

                        # Normal spacing
                        space_count = max(int(gap / 4), 1)
                        line_text += " " * space_count + w["text"]
                        last_x = w["x1"]

                    output_lines.append(line_text.strip())

                output_lines.append(f"\n<<<PAGE_{page_no}_END>>>\n")

        # Save output file
        txt_filename = os.path.splitext(filename)[0] + "_output.txt"
        txt_path = os.path.join(OUTPUT_DIR, txt_filename)

        with open(txt_path, "w", encoding="utf-8") as f:
            f.write("\n".join(output_lines))

        return send_file(txt_path, as_attachment=True)

    except Exception as e:
        return jsonify({"error": str(e)}), 500


# === Run ===
if __name__ == "__main__":
    # app.run(debug=True,host=os.getenv("HOST", "0.0.0.0"),port=int(os.getenv("PORT", 2000)))
    app.run(debug=True,host='165.22.220.143',port='2020')