Skip to content

Commit

Permalink
Merge pull request #1923 from matthiasn/feat/image_analysis
Browse files Browse the repository at this point in the history
feat: add image analysis with llama3.2-vision
  • Loading branch information
matthiasn authored Feb 3, 2025
2 parents 9a246cd + 19641a1 commit fb86cbe
Show file tree
Hide file tree
Showing 34 changed files with 968 additions and 277 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class OllamaRepository {
Stream<CompletionChunk> generate(
String prompt, {
required String model,
required String system,
required double temperature,
String? system,
List<String>? images,
}) {
return _ollama.generate(
Expand Down
81 changes: 81 additions & 0 deletions lib/features/ai/state/ollama_image_analysis.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import 'dart:async';
import 'dart:convert';
import 'dart:io';

import 'package:lotti/classes/journal_entities.dart';
import 'package:lotti/features/ai/repository/ollama_repository.dart';
import 'package:lotti/features/journal/state/entry_controller.dart';
import 'package:lotti/utils/cache_extension.dart';
import 'package:lotti/utils/image_utils.dart';
import 'package:riverpod_annotation/riverpod_annotation.dart';

part 'ollama_image_analysis.g.dart';

@riverpod
class AiImageAnalysisController extends _$AiImageAnalysisController {
@override
String build({
required String id,
}) {
ref.cacheFor(entryCacheDuration);
return '';
}

Future<void> analyzeImage() async {
final provider = entryControllerProvider(id: id);
final notifier = ref.read(provider.notifier);
final entry = ref.watch(provider).value?.entry;

if (entry is! JournalImage) {
return;
}

await notifier.save();

state = '';

const prompt =
'Describe the image in detail, including its content, style, and any '
'relevant information that can be gleaned from the image. '
'If the image is the screenshot of a website, then focus on the '
'the content of the website. Do not make up names. ';

final buffer = StringBuffer();
final image = await getImage(entry);

const model = 'llama3.2-vision:latest'; // TODO: make configurable
const temperature = 0.6;

final stream = ref.read(ollamaRepositoryProvider).generate(
prompt,
model: model,
temperature: temperature,
images: [image],
);

await for (final chunk in stream) {
buffer.write(chunk.text);
state = buffer.toString();
}

final completeResponse =
'```\nDisclaimer: the remainder of this entry until the next linked entry '
"was generated by a multimodal AI model analysing the entry's image. "
'Therefore, it may contain inaccuracies or errors. '
'Please double-check the information before using it. '
'If there are similar concepts to what is discussed in the history '
'of a task, take the information into account but always assume that '
'the information outside of this section is more accurate. \n```'
'\n\n$state';

await notifier.addTextToImage(completeResponse);
}

Future<String> getImage(JournalImage image) async {
final fullPath = getFullImagePath(image);
final bytes = await File(fullPath).readAsBytes();
final base64String = base64Encode(bytes);

return base64String;
}
}
177 changes: 177 additions & 0 deletions lib/features/ai/state/ollama_image_analysis.g.dart

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 0 additions & 52 deletions lib/features/ai/state/ollama_prompt.dart

This file was deleted.

24 changes: 1 addition & 23 deletions lib/features/ai/state/ollama_task_summary.dart
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
import 'dart:async';
import 'dart:convert';
import 'dart:io';

import 'package:lotti/classes/entity_definitions.dart';
import 'package:lotti/classes/journal_entities.dart';
import 'package:lotti/database/database.dart';
import 'package:lotti/features/ai/repositories/ollama_repository.dart';
import 'package:lotti/features/ai/repository/ollama_repository.dart';
import 'package:lotti/features/journal/util/entry_tools.dart';
import 'package:lotti/get_it.dart';
import 'package:lotti/logic/persistence_logic.dart';
import 'package:lotti/services/entities_cache_service.dart';
import 'package:lotti/utils/image_utils.dart';
import 'package:riverpod_annotation/riverpod_annotation.dart';

part 'ollama_task_summary.g.dart';
Expand All @@ -22,7 +19,6 @@ class AiTaskSummaryController extends _$AiTaskSummaryController {
@override
String build({
required String id,
required bool processImages,
}) {
summarizeEntry();
return '';
Expand Down Expand Up @@ -58,8 +54,6 @@ class AiTaskSummaryController extends _$AiTaskSummaryController {
'Calculate total time spent on the task. ';

final buffer = StringBuffer();
final images =
processImages && entry is Task ? await getImages(entry) : null;

const model = 'deepseek-r1:8b'; // TODO: make configurable
const temperature = 0.6;
Expand All @@ -69,7 +63,6 @@ class AiTaskSummaryController extends _$AiTaskSummaryController {
model: model,
system: systemMessage,
temperature: temperature,
images: images,
);

await for (final chunk in stream) {
Expand All @@ -96,21 +89,6 @@ class AiTaskSummaryController extends _$AiTaskSummaryController {
categoryId: entry?.categoryId,
);
}

Future<List<String>> getImages(Task task) async {
final linkedEntities = await _db.getLinkedEntities(id);
final imageEntries = linkedEntities.whereType<JournalImage>();
final base64Images = <String>[];

for (final imageEntry in imageEntries) {
final fullPath = getFullImagePath(imageEntry);
final bytes = await File(fullPath).readAsBytes();
final base64String = base64Encode(bytes);
base64Images.add(base64String);
}

return base64Images;
}
}

@riverpod
Expand Down
Loading

0 comments on commit fb86cbe

Please sign in to comment.