assist-feedback.php
10 months ago
assistant.php
6 months ago
base.php
6 months ago
dropped-file.php
6 months ago
edit-image.php
8 months ago
embed.php
7 months ago
feedback.php
10 months ago
function.php
8 months ago
image.php
8 months ago
parameter.php
1 year ago
text.php
6 months ago
transcribe.php
8 months ago
dropped-file.php
229 lines
| 1 | <?php |
| 2 | |
| 3 | class Meow_MWAI_Query_DroppedFile { |
| 4 | private $data; |
| 5 | private $rawData; |
| 6 | private $type; // Defines what the data is about ('refId', 'url', or 'data') |
| 7 | private $purpose; // Can be 'assistant', 'vision' or 'files' => this needs to be checked |
| 8 | private $mimeType; // 'image/jpeg' or any other mime type |
| 9 | private $fileId; // The ID of the file in the database |
| 10 | public $originalPath; // The original file path (for files loaded from disk) |
| 11 | |
| 12 | public static function from_url( $url, $purpose, $mimeType = null, $fileId = null ) { |
| 13 | if ( empty( $mimeType ) ) { |
| 14 | $mimeType = Meow_MWAI_Core::get_mime_type( $url ); |
| 15 | } |
| 16 | return new Meow_MWAI_Query_DroppedFile( $url, 'url', $purpose, $mimeType, $fileId ); |
| 17 | } |
| 18 | |
| 19 | public static function from_data( $data, $purpose, $mimeType = null ) { |
| 20 | return new Meow_MWAI_Query_DroppedFile( $data, 'data', $purpose, $mimeType ); |
| 21 | } |
| 22 | |
| 23 | public static function from_path( $path, $purpose, $mimeType = null ) { |
| 24 | // Sanitize path to prevent PHAR deserialization attacks |
| 25 | $path = Meow_MWAI_Core::sanitize_file_path( $path ); |
| 26 | $data = file_get_contents( $path ); |
| 27 | if ( empty( $mimeType ) ) { |
| 28 | $mimeType = Meow_MWAI_Core::get_mime_type( $path ); |
| 29 | } |
| 30 | $droppedFile = new Meow_MWAI_Query_DroppedFile( $data, 'data', $purpose, $mimeType ); |
| 31 | // Store the original path for filename extraction |
| 32 | $droppedFile->originalPath = $path; |
| 33 | return $droppedFile; |
| 34 | } |
| 35 | |
| 36 | public static function from_refId( $refId, $purpose, $mimeType = null ) { |
| 37 | return new Meow_MWAI_Query_DroppedFile( $refId, 'refId', $purpose, $mimeType ); |
| 38 | } |
| 39 | |
| 40 | /** |
| 41 | * Create DroppedFile from provider file_id reference (OpenAI, Anthropic, etc.) |
| 42 | * |
| 43 | * For PDFs uploaded to provider Files APIs, we only store the file_id |
| 44 | * Examples: OpenAI 'file-xxx', Anthropic 'file_xxx' |
| 45 | * These are reference-only - the file data lives on the provider's servers |
| 46 | * Do NOT try to load file data from these - use get_refId() to get the file_id |
| 47 | */ |
| 48 | public static function from_provider_file_id( $fileId, $purpose, $mimeType = null ) { |
| 49 | return new Meow_MWAI_Query_DroppedFile( $fileId, 'provider_file_id', $purpose, $mimeType ); |
| 50 | } |
| 51 | |
| 52 | /** |
| 53 | * @deprecated Use from_provider_file_id() instead |
| 54 | * TODO: Remove after March 2026 - Legacy method |
| 55 | */ |
| 56 | public static function from_openai_file_id( $fileId, $purpose, $mimeType = null ) { |
| 57 | return self::from_provider_file_id( $fileId, $purpose, $mimeType ); |
| 58 | } |
| 59 | |
| 60 | public function __construct( $data, $type, $purpose, $mimeType = null, $fileId = null ) { |
| 61 | // TODO: Remove after March 2026 - Legacy type support |
| 62 | if ( $type === 'openai_file_id' ) { |
| 63 | $type = 'provider_file_id'; |
| 64 | } |
| 65 | if ( !empty( $type ) && $type !== 'refId' && $type !== 'url' && $type !== 'data' && $type !== 'provider_file_id' ) { |
| 66 | throw new Exception( 'AI Engine: The file type can only be refId, url, data, or provider_file_id.' ); |
| 67 | } |
| 68 | // TODO: Remove after March 2026 - Legacy purpose mapping |
| 69 | $legacyPurposes = [ 'vision', 'files', 'transcription', 'code_execution', 'assistant-in' ]; |
| 70 | if ( in_array( $purpose, $legacyPurposes, true ) ) { |
| 71 | $purpose = 'analysis'; |
| 72 | } |
| 73 | if ( !empty( $purpose ) && $purpose !== 'analysis' && $purpose !== 'generated' ) { |
| 74 | throw new Exception( 'AI Engine: The file purpose can only be analysis or generated.' ); |
| 75 | } |
| 76 | $this->data = $data; |
| 77 | $this->type = $type; |
| 78 | $this->purpose = $purpose; |
| 79 | $this->mimeType = $mimeType; |
| 80 | $this->fileId = $fileId; |
| 81 | } |
| 82 | |
| 83 | public function get_url() { |
| 84 | if ( $this->type === 'url' ) { |
| 85 | return $this->data; |
| 86 | } |
| 87 | throw new Exception( 'AI Engine: The file is not an URL.' ); |
| 88 | } |
| 89 | |
| 90 | private function get_raw_data() { |
| 91 | if ( !empty( $this->rawData ) ) { |
| 92 | return $this->rawData; |
| 93 | } |
| 94 | if ( $this->type === 'provider_file_id' ) { |
| 95 | // Provider file IDs are reference-only (file data lives on provider's servers) |
| 96 | // Common mistake: trying to load file data for PDFs in conversation history |
| 97 | // Fix: Check file mime type before calling get_data()/get_base64()/get_inline_base64_url() |
| 98 | // For PDFs: use get_refId() to get the file_id string instead |
| 99 | throw new Exception( 'AI Engine: Cannot get raw data for provider file ID (file_id: ' . $this->data . '). Use get_refId() instead.' ); |
| 100 | } |
| 101 | if ( $this->type === 'refId' ) { |
| 102 | global $mwai_core; |
| 103 | |
| 104 | // Prefer loading from disk to avoid HTTP rewrites or CDN issues |
| 105 | $path = $mwai_core->files->get_path( $this->data ); |
| 106 | if ( !empty( $path ) ) { |
| 107 | $path = Meow_MWAI_Core::sanitize_file_path( $path ); |
| 108 | if ( file_exists( $path ) && is_readable( $path ) ) { |
| 109 | $data = file_get_contents( $path ); |
| 110 | if ( $data === false ) { |
| 111 | throw new Exception( 'AI Engine: Failed to read file contents for refId: ' . $this->data ); |
| 112 | } |
| 113 | $this->rawData = $data; |
| 114 | return $this->rawData; |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | // Fallback to the public URL if the local path is unavailable |
| 119 | $url = $mwai_core->files->get_url( $this->data ); |
| 120 | if ( empty( $url ) ) { |
| 121 | throw new Exception( 'AI Engine: Could not find file URL for refId: ' . $this->data ); |
| 122 | } |
| 123 | $parts = wp_parse_url( $url ); |
| 124 | if ( !isset( $parts['scheme'] ) || !in_array( $parts['scheme'], [ 'http', 'https' ], true ) ) { |
| 125 | throw new Exception( 'Invalid URL scheme; only HTTP/HTTPS allowed.' ); |
| 126 | } |
| 127 | $data = file_get_contents( $url ); |
| 128 | if ( $data === false ) { |
| 129 | throw new Exception( 'AI Engine: Failed to download file contents for refId: ' . $this->data ); |
| 130 | } |
| 131 | $this->rawData = $data; |
| 132 | return $this->rawData; |
| 133 | } |
| 134 | else if ( $this->type === 'url' ) { |
| 135 | // Validate URL scheme to prevent SSRF attacks |
| 136 | $parts = wp_parse_url( $this->data ); |
| 137 | if ( !isset( $parts['scheme'] ) || !in_array( $parts['scheme'], [ 'http', 'https' ], true ) ) { |
| 138 | throw new Exception( 'Invalid URL scheme; only HTTP/HTTPS allowed.' ); |
| 139 | } |
| 140 | |
| 141 | $this->rawData = file_get_contents( $this->data ); |
| 142 | return $this->rawData; |
| 143 | } |
| 144 | else if ( $this->type === 'data' ) { |
| 145 | return $this->data; |
| 146 | } |
| 147 | throw new Exception( 'AI Engine: The file is not data or an URL.' ); |
| 148 | } |
| 149 | |
| 150 | public function get_data() { |
| 151 | if ( $this->type === 'provider_file_id' ) { |
| 152 | // Provider file IDs are just references, no data loading needed |
| 153 | throw new Exception( 'AI Engine: Cannot get data for provider file ID. Use get_refId() instead.' ); |
| 154 | } |
| 155 | if ( $this->type === 'refId' || $this->type === 'url' ) { |
| 156 | return $this->get_raw_data(); |
| 157 | } |
| 158 | else if ( $this->type === 'data' ) { |
| 159 | return $this->data; |
| 160 | } |
| 161 | throw new Exception( 'AI Engine: The file is not data or an URL.' ); |
| 162 | } |
| 163 | |
| 164 | public function get_base64() { |
| 165 | $data = $this->get_raw_data(); |
| 166 | return base64_encode( $data ); |
| 167 | } |
| 168 | |
| 169 | // Will return something like "data:image/jpeg;base64,{data}" |
| 170 | public function get_inline_base64_url() { |
| 171 | $b64 = $this->get_base64(); |
| 172 | return "data:{$this->mimeType};base64,{$b64}"; |
| 173 | } |
| 174 | |
| 175 | public function get_type() { |
| 176 | return $this->type; |
| 177 | } |
| 178 | |
| 179 | public function get_purpose() { |
| 180 | return $this->purpose; |
| 181 | } |
| 182 | |
| 183 | public function get_mimeType() { |
| 184 | return $this->mimeType; |
| 185 | } |
| 186 | |
| 187 | public function is_image() { |
| 188 | return strpos( $this->mimeType, 'image' ) !== false; |
| 189 | } |
| 190 | |
| 191 | public function get_fileId() { |
| 192 | return $this->fileId; |
| 193 | } |
| 194 | |
| 195 | public function get_refId() { |
| 196 | if ( $this->type === 'refId' || $this->type === 'provider_file_id' ) { |
| 197 | return $this->data; |
| 198 | } |
| 199 | return null; |
| 200 | } |
| 201 | |
| 202 | // Return a filename for this file. If the file is an URL, use the basename of |
| 203 | // its path. If the file is raw data, generate a generic name based on the mime type. |
| 204 | public function get_filename() { |
| 205 | // If we have an original path (from from_path), use its basename |
| 206 | if ( !empty( $this->originalPath ) ) { |
| 207 | return basename( $this->originalPath ); |
| 208 | } |
| 209 | if ( $this->type === 'refId' ) { |
| 210 | global $mwai_core; |
| 211 | $path = $mwai_core->files->get_path( $this->data ); |
| 212 | return basename( $path ); |
| 213 | } |
| 214 | if ( $this->type === 'url' ) { |
| 215 | $path = parse_url( $this->data, PHP_URL_PATH ); |
| 216 | return basename( $path ); |
| 217 | } |
| 218 | if ( $this->type === 'data' ) { |
| 219 | if ( !empty( $this->mimeType ) ) { |
| 220 | $parts = explode( '/', $this->mimeType ); |
| 221 | $ext = end( $parts ); |
| 222 | return 'file.' . $ext; |
| 223 | } |
| 224 | return 'file.bin'; |
| 225 | } |
| 226 | return 'file'; |
| 227 | } |
| 228 | } |
| 229 |