usomaru技術ぶろぐ

学んだことをちょこちょこメモ( ..)φ

Azure AI Document Intelligence(旧Form Recognizer)を使って請求書をAI-OCRできるWebAPIを作成

はじめに

ずっと「Form Recognizer」って呼んでいたのですが、いつの間にか名前が変わってました…
こちらDocument Intelligence Studioを使うと、Resultで返却されるJSONの項目数が半端ないので、OCRしたいファイルを送ると必要な項目だけすっきりと返却されるようなWebAPIをさくっとレベルで作成しました。

環境と言語

Azure Portalでリソース作成

リソース作成に関しては公式ドキュメントを参考にしていただければです。 ちなみに2023年8月21日時点でリソースの検索ではForm Recognizerと入力しないと表示されず。

WebAPI作成

手順としては以下の通りです。

  1. プロジェクト作成

  2. nuget追加
    Azure.AI.FormRecognizerを追加します。

  3. ソースコード追加
    ※請求書モデルを使用しています。

  4. Web Appsに発行

ソースコード

OCR処理部分
keyとendoPointは作成したForm RecognizerのリソースのKeyとEndPoint

public async Task<AnalyzeResult?> GetInvoiceData(IFormFile file)
{
    AzureKeyCredential credential = new AzureKeyCredential(_key);
    DocumentAnalysisClient client = new DocumentAnalysisClient(new Uri(_endPoint), credential);

    using (var fileStream = file.OpenReadStream())
    {
        AnalyzeDocumentOperation operationDocument = await client.AnalyzeDocumentAsync(WaitUntil.Completed, "prebuilt-invoice", fileStream);
        if (operationDocument != null)
        {
            return operationDocument.Value;
        }
        return null;
    }
}

OCR処理の結果からモデルを作成している部分

public async Task<AnalyzeData> GetAnalysisOfInvoiceData(IFormFile invoiceFile)
{
    var analyzeResult = await _formRecognizerRepository.GetInvoiceData(invoiceFile);
    if (analyzeResult != null)
    {
        AnalyzedDocument document = analyzeResult.Documents[0];
        var documentList = document.Fields.ToList();

        return FormatAnalysis(documentList);
    }
    return new AnalyzeData();
}

private AnalyzeData FormatAnalysis(List<KeyValuePair<string, DocumentField>> documentList)
{
    var analyzeData = new AnalyzeData();
    var fieldDatas = new List<Models.FieldData> ();

    for (int i = 0; i < documentList.Count(); i++)
    {
        if (documentList[i].Key == "Items")
        {
            analyzeData.TableItems = FormatAnalysisOfTableItems(documentList[i].Value);
        } else
        {
            var field = new Models.FieldData();
            field.Key = documentList[i].Key;
            field.Content = FormatAnalysisOfContent(documentList[i].Value);
            field.Confidence = documentList[i].Value.Confidence;
            fieldDatas.Add(field);
        }           
    }
    analyzeData.FieldDatas = fieldDatas;
    return analyzeData;
}

private string FormatAnalysisOfContent(DocumentField documentField)
{
    var returnValue = "";
    switch(documentField.FieldType.ToString())
    {
        case "Address":
            var addressValue = documentField.Content;
            returnValue = addressValue.ToString().Replace("\n", "");
            break;
        case "CountryRegion":
            var countryRegionValue = documentField.Value.AsCountryRegion();
            returnValue = countryRegionValue.ToString();
            break;
        case "Currency":
            var currencyValue = documentField.Value.AsCurrency();
            returnValue = currencyValue.Amount.ToString();
            break;
        case "Date":
            var dateValue = documentField.Value.AsDate();
            returnValue = dateValue.ToString();
            break;
        case "Double":
            var doubleValue = documentField.Value.AsDouble();
            returnValue = doubleValue.ToString();
            break;
        case "Int64":
            var int64Value = documentField.Value.AsInt64();
            returnValue = int64Value.ToString();
            break;
        case "PhoneNumber":
            var phoneNumberValue = documentField.Value.AsPhoneNumber();
            returnValue = phoneNumberValue.ToString();
            break;
        case "SelectionMark":
            var selectionMarkValue = documentField.Value.AsSelectionMarkState();
            returnValue = selectionMarkValue.ToString();
            break;
        case "Signature":
            var signatureValue = documentField.Value.AsSignatureType();
            returnValue = signatureValue.ToString();
            break;
        case "String":
            var stringValue = documentField.Value.AsString();
            returnValue = stringValue.ToString();
            break;
        case "Time":
            var timeValue = documentField.Value.AsTime();
            returnValue = timeValue.ToString();
            break;
        case "Unknown":
            var unknownValue = documentField;
            returnValue = unknownValue.ToString();
            break;
     }
     return returnValue ?? "";
}

private List<TableItemsData> FormatAnalysisOfTableItems(DocumentField documentField)
{
    var listValue = documentField.Value.AsList();
    var tableItems = new List<TableItemsData>();
    for (int i = 0; i < listValue.Count; i++)
    {
        var fieldItems = new List<Models.FieldData>();
        var tableItem = new TableItemsData(); 
        if (listValue[i] != null)
        {              
            var dictionaryValue = listValue[i].Value.AsDictionary();
            if (dictionaryValue != null)
            {
                foreach (var dictionaryEntry in dictionaryValue)
                {
                    var valueEntry = new Models.FieldData();
                    valueEntry.Key = dictionaryEntry.Key;
                    valueEntry.Content = FormatAnalysisOfContent(dictionaryEntry.Value);
                    valueEntry.Confidence = dictionaryEntry.Value.Confidence;
                    fieldItems.Add(valueEntry);
                }
                tableItem.Items = fieldItems;
                tableItem.No = i + 1;
            }
        }
        tableItems.Add(tableItem);
    }
    return tableItems;
}

Controller

[HttpPost]
public async Task<ActionResult<AnalyzeData>> GetInvoiceData(IFormFile file)
{
    var res = await _invoiceDataService.GetAnalysisOfInvoiceData(file);
    if (res != null)
        {
            return Ok(res);
        }
        return StatusCode((int)HttpStatusCode.BadRequest);
}

出力されるモデル

public class AnalyzeData
{
    public List<FieldData> FieldDatas { get; set; } = new List<FieldData>();
    public List<TableItemsData> TableItems { get; set; } = new List<TableItemsData>();
}

public class FieldData
{
    public String Key { get; set; } = string.Empty;
    public String Content { get; set; } = string.Empty;
    public float? Confidence { get; set; } = 0.0f;
}

public class TableItemsData
{
    public int No { get;set; } = 0;
    public List<FieldData> Items { get; set; } = new List<FieldData>();
}

結果

Microsoft公式が提供している請求書PDFで試してみました。 https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-invoice.pdf

{
    "fieldDatas": [
        {
            "key": "AmountDue",
            "content": "610",
            "confidence": 0.958
        },
        {
            "key": "BillingAddress",
            "content": "123 Bill St,Redmond WA, 98052",
            "confidence": 0.894
        },
        {
            "key": "BillingAddressRecipient",
            "content": "Microsoft Finance",
            "confidence": 0.936
        },
        {
            "key": "CustomerAddress",
            "content": "123 Other St, Redmond WA, 98052",
            "confidence": 0.892
        },
        {
            "key": "CustomerAddressRecipient",
            "content": "Microsoft Corp",
            "confidence": 0.934
        },
        {
            "key": "CustomerId",
            "content": "CID-12345",
            "confidence": 0.962
        },
        {
            "key": "CustomerName",
            "content": "MICROSOFT CORPORATION",
            "confidence": 0.912
        },
        {
            "key": "DueDate",
            "content": "12/15/2019 12:00:00 AM +00:00",
            "confidence": 0.974
        },
        {
            "key": "InvoiceDate",
            "content": "11/15/2019 12:00:00 AM +00:00",
            "confidence": 0.975
        },
        {
            "key": "InvoiceId",
            "content": "INV-100",
            "confidence": 0.973
        },
        {
            "key": "InvoiceTotal",
            "content": "110",
            "confidence": 0.972
        },
        {
            "key": "PreviousUnpaidBalance",
            "content": "500",
            "confidence": 0.954
        },
        {
            "key": "PurchaseOrder",
            "content": "PO-3333",
            "confidence": 0.956
        },
        {
            "key": "RemittanceAddress",
            "content": "123 Remit StNew York, NY, 10001",
            "confidence": 0.889
        },
        {
            "key": "RemittanceAddressRecipient",
            "content": "Contoso Billing",
            "confidence": 0.935
        },
        {
            "key": "ServiceAddress",
            "content": "123 Service St, Redmond WA, 98052",
            "confidence": 0.889
        },
        {
            "key": "ServiceAddressRecipient",
            "content": "Microsoft Services",
            "confidence": 0.934
        },
        {
            "key": "ShippingAddress",
            "content": "123 Ship St,Redmond WA, 98052",
            "confidence": 0.893
        },
        {
            "key": "ShippingAddressRecipient",
            "content": "Microsoft Delivery",
            "confidence": 0.933
        },
        {
            "key": "SubTotal",
            "content": "100",
            "confidence": 0.975
        },
        {
            "key": "TotalTax",
            "content": "10",
            "confidence": 0.974
        },
        {
            "key": "VendorAddress",
            "content": "123 456th St New York, NY, 10001",
            "confidence": 0.889
        },
        {
            "key": "VendorAddressRecipient",
            "content": "Contoso Headquarters",
            "confidence": 0.934
        },
        {
            "key": "VendorName",
            "content": "CONTOSO LTD.",
            "confidence": 0.932
        }
    ],
    "tableItems": [
        {
            "no": 1,
            "items": [
                {
                    "key": "Amount",
                    "content": "100",
                    "confidence": 0.905
                },
                {
                    "key": "Description",
                    "content": "Test for 23 fields",
                    "confidence": 0.902
                },
                {
                    "key": "Quantity",
                    "content": "1",
                    "confidence": 0.902
                },
                {
                    "key": "UnitPrice",
                    "content": "1",
                    "confidence": 0.88
                }
            ]
        }
    ]
}

参考