assist-feedback.php
10 months ago
assistant.php
6 months ago
base.php
6 months ago
dropped-file.php
5 months ago
edit-image.php
8 months ago
embed.php
7 months ago
feedback.php
10 months ago
function.php
8 months ago
image.php
6 months ago
parameter.php
1 year ago
text.php
6 months ago
transcribe.php
8 months ago
dropped-file.php
272 lines
| 1 | <?php |
| 2 | |
| 3 | class Meow_MWAI_Query_DroppedFile { |
| 4 | private $data; |
| 5 | private $rawData; |
| 6 | private $type; // Defines what the data is about ('refId', 'url', or 'data') |
| 7 | private $purpose; // Can be 'assistant', 'vision' or 'files' => this needs to be checked |
| 8 | private $mimeType; // 'image/jpeg' or any other mime type |
| 9 | private $fileId; // The ID of the file in the database |
| 10 | public $originalPath; // The original file path (for files loaded from disk) |
| 11 | |
| 12 | /** |
| 13 | * Fetch content from a URL, handling internal vs external URLs differently. |
| 14 | * Internal URLs (same site) use wp_remote_get to avoid SSRF blocking issues. |
| 15 | * External URLs use wp_safe_remote_get for SSRF protection. |
| 16 | */ |
| 17 | private static function fetch_url_content( $url ) { |
| 18 | $parts = wp_parse_url( $url ); |
| 19 | if ( !isset( $parts['scheme'] ) || !in_array( $parts['scheme'], [ 'http', 'https' ], true ) ) { |
| 20 | throw new Exception( 'Invalid URL scheme; only HTTP/HTTPS allowed.' ); |
| 21 | } |
| 22 | |
| 23 | // Check if internal URL by comparing hostnames (handles http/https mismatch) |
| 24 | $site_host = wp_parse_url( get_site_url(), PHP_URL_HOST ); |
| 25 | $url_host = wp_parse_url( $url, PHP_URL_HOST ); |
| 26 | $is_internal = ( $site_host === $url_host ); |
| 27 | |
| 28 | if ( $is_internal ) { |
| 29 | $response = wp_remote_get( $url, [ 'timeout' => 60, 'sslverify' => false ] ); |
| 30 | } |
| 31 | else { |
| 32 | // SSRF protection for external URLs |
| 33 | $response = wp_safe_remote_get( $url, [ 'timeout' => 60, 'redirection' => 0 ] ); |
| 34 | } |
| 35 | |
| 36 | if ( is_wp_error( $response ) ) { |
| 37 | throw new Exception( 'AI Engine: Failed to download file: ' . $response->get_error_message() ); |
| 38 | } |
| 39 | |
| 40 | $data = wp_remote_retrieve_body( $response ); |
| 41 | if ( empty( $data ) ) { |
| 42 | throw new Exception( 'AI Engine: Failed to download file contents from URL.' ); |
| 43 | } |
| 44 | |
| 45 | return $data; |
| 46 | } |
| 47 | |
| 48 | public static function from_url( $url, $purpose, $mimeType = null, $fileId = null ) { |
| 49 | if ( empty( $mimeType ) ) { |
| 50 | $mimeType = Meow_MWAI_Core::get_mime_type( $url ); |
| 51 | } |
| 52 | return new Meow_MWAI_Query_DroppedFile( $url, 'url', $purpose, $mimeType, $fileId ); |
| 53 | } |
| 54 | |
| 55 | public static function from_data( $data, $purpose, $mimeType = null ) { |
| 56 | return new Meow_MWAI_Query_DroppedFile( $data, 'data', $purpose, $mimeType ); |
| 57 | } |
| 58 | |
| 59 | public static function from_path( $path, $purpose, $mimeType = null ) { |
| 60 | // Sanitize path to prevent PHAR deserialization attacks |
| 61 | $path = Meow_MWAI_Core::sanitize_file_path( $path ); |
| 62 | $data = file_get_contents( $path ); |
| 63 | if ( empty( $mimeType ) ) { |
| 64 | $mimeType = Meow_MWAI_Core::get_mime_type( $path ); |
| 65 | } |
| 66 | $droppedFile = new Meow_MWAI_Query_DroppedFile( $data, 'data', $purpose, $mimeType ); |
| 67 | // Store the original path for filename extraction |
| 68 | $droppedFile->originalPath = $path; |
| 69 | return $droppedFile; |
| 70 | } |
| 71 | |
| 72 | public static function from_refId( $refId, $purpose, $mimeType = null ) { |
| 73 | return new Meow_MWAI_Query_DroppedFile( $refId, 'refId', $purpose, $mimeType ); |
| 74 | } |
| 75 | |
| 76 | /** |
| 77 | * Create DroppedFile from provider file_id reference (OpenAI, Anthropic, etc.) |
| 78 | * |
| 79 | * For PDFs uploaded to provider Files APIs, we only store the file_id |
| 80 | * Examples: OpenAI 'file-xxx', Anthropic 'file_xxx' |
| 81 | * These are reference-only - the file data lives on the provider's servers |
| 82 | * Do NOT try to load file data from these - use get_refId() to get the file_id |
| 83 | */ |
| 84 | public static function from_provider_file_id( $fileId, $purpose, $mimeType = null ) { |
| 85 | return new Meow_MWAI_Query_DroppedFile( $fileId, 'provider_file_id', $purpose, $mimeType ); |
| 86 | } |
| 87 | |
| 88 | /** |
| 89 | * @deprecated Use from_provider_file_id() instead |
| 90 | * TODO: Remove after March 2026 - Legacy method |
| 91 | */ |
| 92 | public static function from_openai_file_id( $fileId, $purpose, $mimeType = null ) { |
| 93 | return self::from_provider_file_id( $fileId, $purpose, $mimeType ); |
| 94 | } |
| 95 | |
| 96 | public function __construct( $data, $type, $purpose, $mimeType = null, $fileId = null ) { |
| 97 | // TODO: Remove after March 2026 - Legacy type support |
| 98 | if ( $type === 'openai_file_id' ) { |
| 99 | $type = 'provider_file_id'; |
| 100 | } |
| 101 | if ( !empty( $type ) && $type !== 'refId' && $type !== 'url' && $type !== 'data' && $type !== 'provider_file_id' ) { |
| 102 | throw new Exception( 'AI Engine: The file type can only be refId, url, data, or provider_file_id.' ); |
| 103 | } |
| 104 | // TODO: Remove after March 2026 - Legacy purpose mapping |
| 105 | $legacyPurposes = [ 'vision', 'files', 'transcription', 'code_execution', 'assistant-in' ]; |
| 106 | if ( in_array( $purpose, $legacyPurposes, true ) ) { |
| 107 | $purpose = 'analysis'; |
| 108 | } |
| 109 | if ( !empty( $purpose ) && $purpose !== 'analysis' && $purpose !== 'generated' ) { |
| 110 | throw new Exception( 'AI Engine: The file purpose can only be analysis or generated.' ); |
| 111 | } |
| 112 | $this->data = $data; |
| 113 | $this->type = $type; |
| 114 | $this->purpose = $purpose; |
| 115 | $this->mimeType = $mimeType; |
| 116 | $this->fileId = $fileId; |
| 117 | } |
| 118 | |
| 119 | public function get_url() { |
| 120 | if ( $this->type === 'url' ) { |
| 121 | return $this->data; |
| 122 | } |
| 123 | throw new Exception( 'AI Engine: The file is not an URL.' ); |
| 124 | } |
| 125 | |
| 126 | private function get_raw_data() { |
| 127 | if ( !empty( $this->rawData ) ) { |
| 128 | return $this->rawData; |
| 129 | } |
| 130 | if ( $this->type === 'provider_file_id' ) { |
| 131 | // Provider file IDs are reference-only (file data lives on provider's servers) |
| 132 | // Common mistake: trying to load file data for PDFs in conversation history |
| 133 | // Fix: Check file mime type before calling get_data()/get_base64()/get_inline_base64_url() |
| 134 | // For PDFs: use get_refId() to get the file_id string instead |
| 135 | throw new Exception( 'AI Engine: Cannot get raw data for provider file ID (file_id: ' . $this->data . '). Use get_refId() instead.' ); |
| 136 | } |
| 137 | if ( $this->type === 'refId' ) { |
| 138 | global $mwai_core; |
| 139 | |
| 140 | // Prefer loading from disk to avoid HTTP rewrites or CDN issues |
| 141 | $path = $mwai_core->files->get_path( $this->data ); |
| 142 | if ( !empty( $path ) ) { |
| 143 | $path = Meow_MWAI_Core::sanitize_file_path( $path ); |
| 144 | if ( file_exists( $path ) && is_readable( $path ) ) { |
| 145 | $data = file_get_contents( $path ); |
| 146 | if ( $data === false ) { |
| 147 | throw new Exception( 'AI Engine: Failed to read file contents for refId: ' . $this->data ); |
| 148 | } |
| 149 | $this->rawData = $data; |
| 150 | return $this->rawData; |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | // Fallback to the public URL if the local path is unavailable |
| 155 | $url = $mwai_core->files->get_url( $this->data ); |
| 156 | if ( empty( $url ) ) { |
| 157 | throw new Exception( 'AI Engine: Could not find file URL for refId: ' . $this->data ); |
| 158 | } |
| 159 | $this->rawData = self::fetch_url_content( $url ); |
| 160 | return $this->rawData; |
| 161 | } |
| 162 | else if ( $this->type === 'url' ) { |
| 163 | // For internal URLs, try to read from disk first (more efficient) |
| 164 | $site_host = wp_parse_url( get_site_url(), PHP_URL_HOST ); |
| 165 | $url_host = wp_parse_url( $this->data, PHP_URL_HOST ); |
| 166 | if ( $site_host === $url_host ) { |
| 167 | $upload_dir = wp_upload_dir(); |
| 168 | // Normalize protocols for comparison (http vs https) |
| 169 | $normalized_url = preg_replace( '/^https?:/', '', $this->data ); |
| 170 | $normalized_upload_url = preg_replace( '/^https?:/', '', $upload_dir['baseurl'] ); |
| 171 | if ( strpos( $normalized_url, $normalized_upload_url ) === 0 ) { |
| 172 | $local_path = str_replace( $normalized_upload_url, $upload_dir['basedir'], $normalized_url ); |
| 173 | $local_path = Meow_MWAI_Core::sanitize_file_path( $local_path ); |
| 174 | if ( file_exists( $local_path ) && is_readable( $local_path ) ) { |
| 175 | $this->rawData = file_get_contents( $local_path ); |
| 176 | if ( $this->rawData !== false ) { |
| 177 | return $this->rawData; |
| 178 | } |
| 179 | } |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | // Fetch via HTTP (handles internal vs external URLs with SSRF protection) |
| 184 | $this->rawData = self::fetch_url_content( $this->data ); |
| 185 | return $this->rawData; |
| 186 | } |
| 187 | else if ( $this->type === 'data' ) { |
| 188 | return $this->data; |
| 189 | } |
| 190 | throw new Exception( 'AI Engine: The file is not data or an URL.' ); |
| 191 | } |
| 192 | |
| 193 | public function get_data() { |
| 194 | if ( $this->type === 'provider_file_id' ) { |
| 195 | // Provider file IDs are just references, no data loading needed |
| 196 | throw new Exception( 'AI Engine: Cannot get data for provider file ID. Use get_refId() instead.' ); |
| 197 | } |
| 198 | if ( $this->type === 'refId' || $this->type === 'url' ) { |
| 199 | return $this->get_raw_data(); |
| 200 | } |
| 201 | else if ( $this->type === 'data' ) { |
| 202 | return $this->data; |
| 203 | } |
| 204 | throw new Exception( 'AI Engine: The file is not data or an URL.' ); |
| 205 | } |
| 206 | |
| 207 | public function get_base64() { |
| 208 | $data = $this->get_raw_data(); |
| 209 | return base64_encode( $data ); |
| 210 | } |
| 211 | |
| 212 | // Will return something like "data:image/jpeg;base64,{data}" |
| 213 | public function get_inline_base64_url() { |
| 214 | $b64 = $this->get_base64(); |
| 215 | return "data:{$this->mimeType};base64,{$b64}"; |
| 216 | } |
| 217 | |
| 218 | public function get_type() { |
| 219 | return $this->type; |
| 220 | } |
| 221 | |
| 222 | public function get_purpose() { |
| 223 | return $this->purpose; |
| 224 | } |
| 225 | |
| 226 | public function get_mimeType() { |
| 227 | return $this->mimeType; |
| 228 | } |
| 229 | |
| 230 | public function is_image() { |
| 231 | return strpos( $this->mimeType, 'image' ) !== false; |
| 232 | } |
| 233 | |
| 234 | public function get_fileId() { |
| 235 | return $this->fileId; |
| 236 | } |
| 237 | |
| 238 | public function get_refId() { |
| 239 | if ( $this->type === 'refId' || $this->type === 'provider_file_id' ) { |
| 240 | return $this->data; |
| 241 | } |
| 242 | return null; |
| 243 | } |
| 244 | |
| 245 | // Return a filename for this file. If the file is an URL, use the basename of |
| 246 | // its path. If the file is raw data, generate a generic name based on the mime type. |
| 247 | public function get_filename() { |
| 248 | // If we have an original path (from from_path), use its basename |
| 249 | if ( !empty( $this->originalPath ) ) { |
| 250 | return basename( $this->originalPath ); |
| 251 | } |
| 252 | if ( $this->type === 'refId' ) { |
| 253 | global $mwai_core; |
| 254 | $path = $mwai_core->files->get_path( $this->data ); |
| 255 | return basename( $path ); |
| 256 | } |
| 257 | if ( $this->type === 'url' ) { |
| 258 | $path = parse_url( $this->data, PHP_URL_PATH ); |
| 259 | return basename( $path ); |
| 260 | } |
| 261 | if ( $this->type === 'data' ) { |
| 262 | if ( !empty( $this->mimeType ) ) { |
| 263 | $parts = explode( '/', $this->mimeType ); |
| 264 | $ext = end( $parts ); |
| 265 | return 'file.' . $ext; |
| 266 | } |
| 267 | return 'file.bin'; |
| 268 | } |
| 269 | return 'file'; |
| 270 | } |
| 271 | } |
| 272 |