merge: follow-up to !390 (!397)

View MR for information: https://activitypub.software/TransFem-org/Sharkey/-/merge_requests/397 Approved-by: Amelia Yukii <amelia.yukii@shourai.de> Approved-by: Marie <marie@kaifa.ch>
2025-01-22 22:43:09 +02:00 · 2024-02-03 17:05:21 +00:00 · 2024-02-03 17:05:21 +00:00 · 65a7623944
commit 65a7623944
parent b2d520369f 4bc517ca89
1 changed files with 28 additions and 32 deletions
--- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts
+++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts
@ -1,4 +1,5 @@
 import * as fs from 'node:fs';
+import * as fsp from 'node:fs/promises';
 import * as vm from 'node:vm';
 import * as crypto from 'node:crypto';
 import { Inject, Injectable } from '@nestjs/common';
@ -51,7 +52,7 @@ export class ImportNotesProcessorService {

 	@bindThis
 	private async uploadFiles(dir: string, user: MiUser, folder?: MiDriveFolder['id']) {
-		const fileList = fs.readdirSync(dir);
+		const fileList = await fsp.readdir(dir);
 		for await (const file of fileList) {
 			const name = `${dir}/${file}`;
 			if (fs.statSync(name).isDirectory()) {
@ -130,14 +131,16 @@ export class ImportNotesProcessorService {
 		return typeof obj[Symbol.iterator] === 'function';
 	}

-	private parseTwitterFile(str : string) : null | [{ tweet: any }] {
-		const removed = str.replace(new RegExp('window\\.YTD\\.tweets\\.part0 = ', 'g'), '');
-	
+	@bindThis
+	private parseTwitterFile(str : string) : { tweet: object }[] {
+		const jsonStr = str.replace(/^\s*window\.YTD\.tweets\.part0\s*=\s*/, '');
+
 		try {
-			return JSON.parse(removed);
+			return JSON.parse(jsonStr);
 		} catch (error) {
 			//The format is not what we expected. Either this file was tampered with or twitters exports changed
-			return null;
+			this.logger.warn('Failed to import twitter notes due to malformed file');
+			throw error;
 		}
 	}

@ -173,7 +176,7 @@ export class ImportNotesProcessorService {
 			const destPath = path + '/twitter.zip';

 			try {
-				fs.writeFileSync(destPath, '', 'binary');
+				await fsp.writeFile(destPath, '', 'binary');
 				await this.downloadService.downloadUrl(file.url, destPath);
 			} catch (e) { // TODO: 何度か再試行
 				if (e instanceof Error || typeof e === 'string') {
@ -185,21 +188,13 @@ export class ImportNotesProcessorService {
 			const outputPath = path + '/twitter';
 			try {
 				this.logger.succ(`Unzipping to ${outputPath}`);
-				ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath));
+				ZipReader.withDestinationPath(outputPath).viaBuffer(await fsp.readFile(destPath));

-				const unprocessedTweetJson = this.parseTwitterFile(fs.readFileSync(outputPath + '/data/tweets.js', 'utf-8'));
+				const unprocessedTweets = this.parseTwitterFile(await fsp.readFile(outputPath + '/data/tweets.js', 'utf-8'));

-				//Make sure that it isnt null (because if something went wrong in parseTwitterFile it returns null)
-				if (unprocessedTweetJson) {
-					const tweets = Object.keys(unprocessedTweetJson).reduce((m, key, i, obj) => {
-						return m.concat(unprocessedTweetJson[i].tweet);
-					}, []);
-
-					const processedTweets = await this.recreateChain(['id_str'], ['in_reply_to_status_id_str'], tweets, false);
-					this.queueService.createImportTweetsToDbJob(job.data.user, processedTweets, null);
-				} else {
-					this.logger.warn('Failed to import twitter notes due to malformed file');
-				}
+				const tweets = unprocessedTweets.map(e => e.tweet);
+				const processedTweets = await this.recreateChain(['id_str'], ['in_reply_to_status_id_str'], tweets, false);
+				this.queueService.createImportTweetsToDbJob(job.data.user, processedTweets, null);
 			} finally {
 				cleanup();
 			}
@ -211,7 +206,7 @@ export class ImportNotesProcessorService {
 			const destPath = path + '/facebook.zip';

 			try {
-				fs.writeFileSync(destPath, '', 'binary');
+				await fsp.writeFile(destPath, '', 'binary');
 				await this.downloadService.downloadUrl(file.url, destPath);
 			} catch (e) { // TODO: 何度か再試行
 				if (e instanceof Error || typeof e === 'string') {
@ -223,8 +218,8 @@ export class ImportNotesProcessorService {
 			const outputPath = path + '/facebook';
 			try {
 				this.logger.succ(`Unzipping to ${outputPath}`);
-				ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath));
-				const postsJson = fs.readFileSync(outputPath + '/your_activity_across_facebook/posts/your_posts__check_ins__photos_and_videos_1.json', 'utf-8');
+				ZipReader.withDestinationPath(outputPath).viaBuffer(await fsp.readFile(destPath));
+				const postsJson = await fsp.readFile(outputPath + '/your_activity_across_facebook/posts/your_posts__check_ins__photos_and_videos_1.json', 'utf-8');
 				const posts = JSON.parse(postsJson);
 				const facebookFolder = await this.driveFoldersRepository.findOneBy({ name: 'Facebook', userId: job.data.user.id, parentId: folder?.id });
 				if (facebookFolder == null && folder) {
@ -244,7 +239,7 @@ export class ImportNotesProcessorService {
 			const destPath = path + '/unknown.zip';

 			try {
-				fs.writeFileSync(destPath, '', 'binary');
+				await fsp.writeFile(destPath, '', 'binary');
 				await this.downloadService.downloadUrl(file.url, destPath);
 			} catch (e) { // TODO: 何度か再試行
 				if (e instanceof Error || typeof e === 'string') {
@ -256,11 +251,11 @@ export class ImportNotesProcessorService {
 			const outputPath = path + '/unknown';
 			try {
 				this.logger.succ(`Unzipping to ${outputPath}`);
-				ZipReader.withDestinationPath(outputPath).viaBuffer(await fs.promises.readFile(destPath));
+				ZipReader.withDestinationPath(outputPath).viaBuffer(await fsp.readFile(destPath));
 				const isInstagram = type === 'Instagram' || fs.existsSync(outputPath + '/instagram_live') || fs.existsSync(outputPath + '/instagram_ads_and_businesses');
 				const isOutbox = type === 'Mastodon' || fs.existsSync(outputPath + '/outbox.json');
 				if (isInstagram) {
-					const postsJson = fs.readFileSync(outputPath + '/content/posts_1.json', 'utf-8');
+					const postsJson = await fsp.readFile(outputPath + '/content/posts_1.json', 'utf-8');
 					const posts = JSON.parse(postsJson);
 					const igFolder = await this.driveFoldersRepository.findOneBy({ name: 'Instagram', userId: job.data.user.id, parentId: folder?.id });
 					if (igFolder == null && folder) {
@ -270,16 +265,16 @@ export class ImportNotesProcessorService {
 					}
 					this.queueService.createImportIGToDbJob(job.data.user, posts);
 				} else if (isOutbox) {
-					const actorJson = fs.readFileSync(outputPath + '/actor.json', 'utf-8');
+					const actorJson = await fsp.readFile(outputPath + '/actor.json', 'utf-8');
 					const actor = JSON.parse(actorJson);
 					const isPleroma = actor['@context'].some((v: any) => typeof v === 'string' && v.match(/litepub(.*)/));
 					if (isPleroma) {
-						const outboxJson = fs.readFileSync(outputPath + '/outbox.json', 'utf-8');
+						const outboxJson = await fsp.readFile(outputPath + '/outbox.json', 'utf-8');
 						const outbox = JSON.parse(outboxJson);
 						const processedToots = await this.recreateChain(['object', 'id'], ['object', 'inReplyTo'], outbox.orderedItems.filter((x: any) => x.type === 'Create' && x.object.type === 'Note'), true);
 						this.queueService.createImportPleroToDbJob(job.data.user, processedToots, null);
 					} else {
-						const outboxJson = fs.readFileSync(outputPath + '/outbox.json', 'utf-8');
+						const outboxJson = await fsp.readFile(outputPath + '/outbox.json', 'utf-8');
 						const outbox = JSON.parse(outboxJson);
 						let mastoFolder = await this.driveFoldersRepository.findOneBy({ name: 'Mastodon', userId: job.data.user.id, parentId: folder?.id });
 						if (mastoFolder == null && folder) {
@ -302,7 +297,7 @@ export class ImportNotesProcessorService {
 			this.logger.info(`Temp dir is ${path}`);

 			try {
-				fs.writeFileSync(path, '', 'utf-8');
+				await fsp.writeFile(path, '', 'utf-8');
 				await this.downloadService.downloadUrl(file.url, path);
 			} catch (e) { // TODO: 何度か再試行
 				if (e instanceof Error || typeof e === 'string') {
@ -311,7 +306,7 @@ export class ImportNotesProcessorService {
 				throw e;
 			}

-			const notesJson = fs.readFileSync(path, 'utf-8');
+			const notesJson = await fsp.readFile(path, 'utf-8');
 			const notes = JSON.parse(notesJson);
 			const processedNotes = await this.recreateChain(['id'], ['replyId'], notes, false);
 			this.queueService.createImportKeyNotesToDbJob(job.data.user, processedNotes, null);
@ -590,7 +585,8 @@ export class ImportNotesProcessorService {

 		try {
 			const date = new Date(tweet.created_at);
-			const textReplaceURLs = tweet.entities.urls && tweet.entities.urls.length > 0 ? await replaceTwitterUrls(tweet.full_text, tweet.entities.urls) : tweet.full_text;
+			const decodedText = tweet.full_text.replaceAll('&gt;', '>').replaceAll('&lt;', '<').replaceAll('&amp;', '&');
+			const textReplaceURLs = tweet.entities.urls && tweet.entities.urls.length > 0 ? await replaceTwitterUrls(decodedText, tweet.entities.urls) : decodedText;
 			const text = tweet.entities.user_mentions && tweet.entities.user_mentions.length > 0 ? await replaceTwitterMentions(textReplaceURLs, tweet.entities.user_mentions) : textReplaceURLs;
 			const files: MiDriveFile[] = [];